changeset 15:e395e387bd3c

FIX: now catching precursor signs of lock-up bug correctly ENHANCEMENT: As a side effect, mlWSAlloc and mlExec are faster.
author samer
date Thu, 02 Feb 2012 12:57:15 +0000
parents e1f87438e34c
children f2a0f5eaaaa5
files README cpp/plml.cpp prolog/plml.pl
diffstat 3 files changed, 88 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/README	Thu Feb 02 03:01:20 2012 +0000
+++ b/README	Thu Feb 02 12:57:15 2012 +0000
@@ -280,6 +280,28 @@
 	Removed unused Matlab functions from matlab/general.
 	Version 1!
 
+2012-02
+	Some changes to mlExec and mlWSAlloc to get around a problem that
+	was bedevilling the triserver project, which was using a Matlab
+	engine in a separate thread, receiving requests on a message queue,
+	and calling this library with call_with_time_limit. Something to
+	do with this combination (threads, signals etc) was causing the
+	Prolog system to lock up hard on returning to Prolog system
+	after a failing call to engGetVariable (which was getting stuck and
+	timing out).
+
+	I still don't know what causes the lock-up, but I was able to detect
+	some signs that it was coming *before* calling engGetVariable by looking
+	at the Matlab engine output buffer.
+
+	Eventual work around is not to use engGetVariable to get uniquevar
+	variable names (mlWSAlloc) or lasterr (mlExec) at all, but simply
+	to scrape these out of the output buffer. This seems to be a few
+	milliseconds faster too.
+
+	other changes: removed ml_debug/1 - would rather have a flag to
+	enable printfs in the C++ source.
+
 -------------------------------------------------------------------------
 ACKNOWLEDGMENTS
 
--- a/cpp/plml.cpp	Thu Feb 02 03:01:20 2012 +0000
+++ b/cpp/plml.cpp	Thu Feb 02 12:57:15 2012 +0000
@@ -82,14 +82,21 @@
 #include <unistd.h>
 #include "engine.h"
 
+
+#define ALT_LASTERR 1
+#define ALT_WSALLOC 1
+
 /* The maximum number of simultaneous connections to Matlab from one
    Prolog process. */
 #define MAXENGINES 4    
 #define BUFSIZE  32768 // buffer for matlab output
 #define MAXCMDLEN 256
-// #define EVALFMT "t__ex=[];\ntry\n%s\ncatch t__ex\ndisp(getReport(t__ex))\nend"
-//#define EVALFMT "lasterr('');disp('#');%s\nt__ex=lasterr;"
-#define EVALFMT "lasterr('');disp('#');%s"
+
+#ifdef ALT_LASTERR
+#	define EVALFMT "lasterr('');disp('#');%s"
+#else
+#	define EVALFMT "lasterr('');disp('#');%s\nt__ex=lasterr;"
+#endif
 
 using namespace std;
 
@@ -508,11 +515,29 @@
 
   //printf("-- Entering mlWSALLOC         \r"); fflush(stdout); 
   struct wsvar x;
-  int rc;
 
   x.engine = engine->ep;
   x.id     = engine->id;
 
+#ifdef ALT_WSALLOC
+  // printf("-- mlWSAlloc: Calling uniquevar...       \r"); fflush(stdout); 
+  if (engEvalString(x.engine, "uniquevar([])")) {
+	  return raise_exception("mlWSAlloc: Cannot execute uniquevar");
+  }
+ 
+  if (strncmp(engine->outbuf,">> \nans =\n\nt_",13)!=0) {
+     //printf("\n** mlWSAlloc: output buffer looks bad: '%s'\n",engine->outbuf);
+	  return raise_exception("mlWSAlloc: Bad output buffer after uniquevar.");
+  }
+
+	int len=strlen(engine->outbuf+11)-2;
+	if (len+1>sizeof(x.name)) {
+	  return raise_exception("mlWSAlloc: uniquevar name is too long.");
+	 }
+	memcpy(x.name,engine->outbuf+11,len);
+	x.name[len]=0;
+#else
+ 
   // printf("-- mlWSAlloc: Calling uniquevar...       \r"); fflush(stdout); 
   if (engEvalString(x.engine, "t__0=uniquevar([])")) {
 	  return raise_exception("mlWSAlloc: Cannot execute uniquevar");
@@ -528,11 +553,12 @@
 	  return raise_exception("mlWSAlloc: Cannot get new variable name.");
   }
   memset(x.name,sizeof(x.name),0);
-  rc = mxGetString(newname,x.name, sizeof(x.name));
+  int rc = mxGetString(newname,x.name, sizeof(x.name));
   mxDestroyArray(newname);
   if (rc) {
 	  return raise_exception("mlWSAlloc: Cannot read new variable name.");
   }
+#endif
 
   return PL_unify_blob(blob,&x,sizeof(x),&ws_blob);
 }
@@ -585,7 +611,6 @@
   try {
     eng *eng=findEngine(engine);
 	 const char *cmdstr=PlTerm(cmd);
-	 char *eval_cmd;
 	 int	cmdlen=strlen(cmdstr);
 	 int	rc;
     
@@ -598,39 +623,38 @@
 		cmdlen=strlen(cmdstr);
 	 }
 
-	 eval_cmd = new char[cmdlen+strlen(EVALFMT)-1];
-	 if (eval_cmd==NULL) throw PlException("Failed to allocate memory for command");
-	 sprintf(eval_cmd, EVALFMT, cmdstr);
-	 //printf("-- Calling Matlab engine...                 \r"); fflush(stdout);
-	 rc=engEvalString(eng->ep,eval_cmd);
-	 //printf("-- Returned from Matlab engine...            \r"); fflush(stdout);
-	 delete [] eval_cmd;
+	 {  // scope for eval_cmd
+		 char *eval_cmd = new char[cmdlen+strlen(EVALFMT)-1];
+	 	 if (eval_cmd==NULL) throw PlException("Failed to allocate memory for command");
+	 	 sprintf(eval_cmd, EVALFMT, cmdstr);
+	 	 //printf("-- Calling Matlab engine...                 \r"); fflush(stdout);
+	 	 rc=engEvalString(eng->ep,eval_cmd);
+	 	 //printf("-- Returned from Matlab engine...            \r"); fflush(stdout);
+	 	 delete [] eval_cmd;
+	}
+
+    if (rc) { throw PlException("mlExec: engEvalString failed."); }
 
 	 // EVALFMT starts with disp('#'). This means that the output buffer should
-	 // contain at least the 5 characters: ">> *\n". If they are not there,
+	 // contain at least the 5 characters: ">> #\n". If they are not there,
 	 // something is terribly wrong and we must throw an exeption to avoid
-	 // locking up.
+	 // locking up in triserver.
     if (strncmp(eng->outbuf,">> #\n",5)!=0) {
 		 //printf("\n** mlExec: Output buffer looks bad: '%s'.\n",eng->outbuf);
 		 throw PlException("mlExec: output buffer looks bad.");
 	 }
-    if (rc) {
-		 // printf("\n** mlExec: evaluation error. Output buffer contains:\n");
-		 throw PlException("mlExec: engEvalString failed.");
-	 }
-	 // write whatever is in the output buffer now.
+	 // write whatever is in the output buffer now, starting after the "#\n"
     fputs(eng->outbuf+5,stdout);
 
-	 if (PL_handle_signals()<0) {
-		 printf("\n** mlExec: evaluation interrupted by signal.\n");
-		 return FALSE;
-	 }
+	
+#ifdef ALT_LASTERR
+
+	 // --------------- ALTERNATIVE LASTERR SCHEME ------------------
+	 // call engine to eval lasterr, then scrape from output buffer
+	 // it's faster and easier and not prone to engGetVariable lock up problem!
 
 	 rc=engEvalString(eng->ep,"lasterr");
-	 if (rc) {
-		 //printf("\n** mlExec: unable to execute lasterr.\n");
-		 throw PlException("mlExec: unable to execute lasterr");
-	 }
+	 if (rc) { throw PlException("mlExec: unable to execute lasterr"); }
 	 if (strncmp(eng->outbuf,">> \nans =",9)!=0) {
 		 //printf("\n** mlExec: Bad output buffer post lasterr: '%s'.\n",eng->outbuf);
 		 throw PlException("mlExec: bad output buffer post lasterr");
@@ -638,22 +662,28 @@
 
 	 if (strncmp(eng->outbuf+11,"     ''",7)!=0) {
 		int len=strlen(eng->outbuf+11)-2;
-		char *lasterr=(char *)malloc(len+1);
+		char *lasterr= new char[len+1];
 		term_t desc=PL_new_term_ref();
 		term_t cmd=PL_new_term_ref();
 		term_t ex=PL_new_term_ref();
+
 		memcpy(lasterr,eng->outbuf+11,len);
 		lasterr[len]=0;
 
 		PL_put_atom_chars(desc,lasterr);
 		PL_put_atom_chars(cmd,cmdstr);
-		free(lasterr);
+		delete [] lasterr;
+
 		check(PL_cons_functor(ex,mlerror,engine,desc,cmd));
 		throw PlException(ex);
     }
 
+#else
 
-/*
+	 // --------------- ORIGINAL LASTERR SCHEME ------------------
+	 // Execution puts lasterr into t__ex, then we use engGetVariable to
+	 // retrieve it.
+
 	 //printf(" - mlExec: output buffer: '%s'\n",eng->outbuf);
     mxArray *lasterr = engGetVariable(eng->ep, "t__ex");
 	 //printf(" - mlExec: output buffer after: ++%s++\n",eng->outbuf);
@@ -665,8 +695,8 @@
 		 throw PlException("mlExec: unable to get lasterr");
 	 }
     
-    if (mxGetNumberOfElements(lasterr)>0) {
-		 //char *string=mxArrayToString(mxGetField(lasterr,0,"message"));
+    if (mxGetNumberOfElements(lasterr)==0) mxDestroyArray(lasterr);
+	 else {
 		 char *string=mxArrayToString(lasterr);
 	    mxDestroyArray(lasterr);
 
@@ -680,14 +710,10 @@
 		check(PL_cons_functor(ex,mlerror,engine,desc,cmd));
 		throw PlException(ex);
 		 
-	 } else mxDestroyArray(lasterr);
-*/
-    
-    // if we've got this far, then everything went well, so
-	 //printf("                                        \r"); fflush(stdout); 
+	 } 
+#endif
 	 return TRUE;
   } catch (PlException &e) { 
-	 //printf("\n** mlExec: Throwing exception\n");
     return e.plThrow(); 
   }
 }
--- a/prolog/plml.pl	Thu Feb 02 03:01:20 2012 +0000
+++ b/prolog/plml.pl	Thu Feb 02 12:57:15 2012 +0000
@@ -22,7 +22,6 @@
 
 	,	term_mlstring/3   % (+Id, +Expr, -String)  ~Prolog term to Matlab string
 	,	term_texatom/2		% (+Expr, -Atom)         ~Prolog term to TeX expression
-	,	ml_debug/1        % (+Bool)
 	,	wsvar/3      		% (+WSBlob, -Name, -Id)
 
 	% MATBASE
@@ -286,7 +285,7 @@
 %	    matlab_path/2 and matlab_init/2 clauses. Otherwise, do run them.
 %    * debug(In,Out)
 %      if present, Matlab is started in a script which captures standard 
-%      input and output to files In and Out respectively.
+%      input and output to files In and Out respectively. (tbd)
 %
 %	[What if session is already open and attached to Id?]
 
@@ -328,7 +327,6 @@
 %  Execute Matlab expression without returning any values.
 ml_exec(Id,X)  :- 
 	term_mlstring(Id,X,C), !, 
-	(plml_flag(debug,true) -> format('ml_exec(~w):~s\n',[Id,C]); true),
 	mlEXEC(Id,C).
 
 %% ml_eval(+Id:ml_eng, +Expr:ml_expr, +Types:list(type), -Res:list(ml_val)) is det.
@@ -390,11 +388,6 @@
 ??? Q   :- ml_test(ml,Q).
 
 
-%% ml_debug(+Flag:boolean) is det.
-%  Set or reset debug state. =|ml_debug(true)|= causes formatted Matlab
-%  statements to be printed before being sent to Matlab engine.
-ml_debug(F) :- set_flag(debug,F).
-
 /*
  * DCG for term to matlab conversion
  *  the big problem with Matlab syntax is that you cannot always replace