Tail self-call optimization in generated C code?

Tue Aug 22 08:48:53 UTC 2017

Hey,

from compiler sources (see src/cmp/cmpexit.lsp for functions which 
determine if TSC is possible):

;;; Tail-recursion optimization for a function F is possible only if
;;;     1. F receives only required parameters, and
;;;     2. no required parameter of F is enclosed in a closure.
;;;
;;; A recursive call (F e1 ... en) may be replaced by a loop only if
;;;     1. F is not declared as NOTINLINE,
;;;     2. n is equal to the number of required parameters of F,
;;;     3. the form is a normal function call (i.e. args are not 
ARGS-PUSHED),
;;;     4. (F e1 ... en) is not surrounded by a form that causes dynamic
;;;        binding (such as LET, LET*, PROGV),
;;;     5. (F e1 ... en) is not surrounded by a form that that pushes a 
frame
;;;        onto the frame-stack (such as BLOCK and TAGBODY whose tags are
;;;        enclosed in a closure, and CATCH),

Best regards,

Daniel

On 22.08.2017 10:41, Eric Brunel wrote:
> I thought it would work, but I still have a problem: I reorganized the 
> code so that there aren't any &optional parameters in any of my 
> functions and it worked quite well: the generated C code for almost 
> all of them do optimize the tail self-call... except one, which is of 
> course the biggest and most complicated one. I double-checked it 
> thoroughly, and I can't find any reason why it wouldn't work: it is 
> properly tail-recursive, and actually, every recursive call in the 
> generated C code is immediately followed by a 'return'.
>
> Now I noticed that for every function, the generated C code starts 
> with something like:
>
> {
>  /* Some declarations... */
>  const cl_env_ptr cl_env_copy = ecl_process_env();
>  cl_object value0;
>  ecl_cs_check(cl_env_copy,value0);
>  {
> TTL:
>
> the TTL label being used for the tail-call optimization. This happens 
> even if the function is not recursive at all, in which case the TTL 
> label is never used.
>
> But for the function that doesn't work, the 'TTL:' line is not even 
> there. So there must be something in the Lisp function that prevents 
> the TCO from happening. Is there a document somewhere that describes 
> how the TCO is done, and in which cases it cannot be done? That would 
> help me a lot to figure out what's happening here and how to fix it...
>
>
>
> Le 2017-08-21 15:14, Eric Brunel a écrit :
>> Le 2017-08-21 14:55, PR a écrit :
>>> 2017-08-21 14:11 GMT+02:00, Eric Brunel <eric.brunel at pragmadev.com>:
>>>> Hello all,
>>>>
>>>> I'm trying to get ECL to optimize tail self-calls in the C code
>>>> generated from the Lisp files and it looks like I'm missing something,
>>>> because I can't find a way to do that.
>>>
>>> Here is an example that works (not written by me, it's from cliki.net):
>>>
>>> (defun fib (n)
>>>   "Tail-recursive computation of the nth element."
>>>   (check-type n (integer 0 *))
>>>   (labels ((fib-aux (n f1 f2)
>>>              (if (zerop n)
>>>                  f1
>>>                  (fib-aux (1- n) f2 (+ f1 f2)))))
>>>     (fib-aux n 0 1)))
>>>
>>> The generated C code uses goto, as you can see:
>>>
>>> static cl_object LC1fib_aux(cl_object v1n, cl_object v2f1, cl_object 
>>> v3f2)
>>> {
>>>  cl_object env0;
>>>  const cl_env_ptr cl_env_copy = ecl_process_env();
>>>  cl_object value0;
>>>  ecl_cs_check(cl_env_copy,value0);
>>>  {
>>> TTL:
>>>   if (!(ecl_zerop(v1n))) { goto L1; }
>>>   value0 = v2f1;
>>>   cl_env_copy->nvalues = 1;
>>>   return value0;
>>> L1:;
>>>   v1n = ecl_one_minus(v1n);
>>>   {
>>>    cl_object v4;
>>>    v4 = v3f2;
>>>    v3f2 = ecl_plus(v2f1,v3f2);
>>>    v2f1 = v4;
>>>   }
>>>   goto TTL;
>>>  }
>>> }
>>>
>>
>> It does indeed, and I think I got it: the optional parameters to my
>> function seem to prevent the optimization from happening. If I rewrite
>> my function as:
>>
>> (defun enumerate-aux (l index result)
>>   (cond
>>     ((null l) (reverse result))
>>     (t (enumerate-aux (cdr l) (+ index 1) (cons (list index (car l)) 
>> result)))
>>   )
>> )
>> (defun enumerate (l) (enumerate-aux l 0 nil))
>>
>> the generated code for the enumerate-aux function is as expected:
>>
>> static cl_object L1enumerate_aux(cl_object v1l, cl_object v2index,
>> cl_object v3result)
>> {
>>  cl_object T0, T1;
>>  cl_object env0;
>>  const cl_env_ptr cl_env_copy = ecl_process_env();
>>  cl_object value0;
>>  ecl_cs_check(cl_env_copy,value0);
>>  {
>> TTL:
>>   if (!(v1l==ECL_NIL)) { goto L1; }
>>   value0 = cl_reverse(v3result);
>>   return value0;
>> L1:;
>>   {
>>    cl_object v4;
>>    v4 = ecl_cdr(v1l);
>>    {
>>     cl_object v5;
>>     v5 = ecl_plus(v2index,ecl_make_fixnum(1));
>>     T0 = ecl_car(v1l);
>>     T1 = cl_list(2, v2index, T0);
>>     v3result = CONS(T1,v3result);
>>     v2index = v5;
>>     v1l = v4;
>>    }
>>   }
>>   goto TTL;
>>  }
>> }
>>
>> Not sure I understand why, but I have a solution.
>>
>> Thank you very much!
>>  - Eric -
>>
>>
>>>
>>> Paul
>>>
>>>
>>>>
>>>> Here is the behavior I'm getting for this simple Lisp function:
>>>>
>>>> (defun enumerate (l &optional (index 0) (result nil))
>>>>    (cond
>>>>      ((null l) (reverse result))
>>>>      (t (enumerate (cdr l) (+ index 1) (cons (list index (car l))
>>>> result)))
>>>>    )
>>>> )
>>>>
>>>> As far as I can see, this function is properly tail-recursive, so I
>>>> expected the generated C code to take the into account and generate a
>>>> goto instead of a recursive call. But what I'm getting is this:
>>>>
>>>> static cl_object L1enumerate(cl_narg narg, cl_object v1l, ...)
>>>> {
>>>>   cl_object T0, T1, T2, T3, T4;
>>>>   cl_object env0;
>>>>   const cl_env_ptr cl_env_copy = ecl_process_env();
>>>>   cl_object value0;
>>>>   ecl_cs_check(cl_env_copy,value0);
>>>>   if (ecl_unlikely(narg<1)) FEwrong_num_arguments_anonym();
>>>>   if (ecl_unlikely(narg>3)) FEwrong_num_arguments_anonym();
>>>>   {
>>>>    cl_object v2index;
>>>>    cl_object v3result;
>>>>    va_list args; va_start(args,v1l);
>>>>    {
>>>>     int i = 1;
>>>>     if (i >= narg) {
>>>>      v2index = ecl_make_fixnum(0);
>>>>     } else {
>>>>      i++;
>>>>      v2index = va_arg(args,cl_object);
>>>>     }
>>>>     if (i >= narg) {
>>>>      v3result = ECL_NIL;
>>>>     } else {
>>>>      i++;
>>>>      v3result = va_arg(args,cl_object);
>>>>     }
>>>>    }
>>>>    va_end(args);
>>>>    if (!(v1l==ECL_NIL)) { goto L3; }
>>>>    value0 = cl_reverse(v3result);
>>>>    return value0;
>>>> L3:;
>>>>    T0 = ecl_cdr(v1l);
>>>>    T1 = ecl_plus(v2index,ecl_make_fixnum(1));
>>>>    T2 = ecl_car(v1l);
>>>>    T3 = cl_list(2, v2index, T2);
>>>>    T4 = CONS(T3,v3result);
>>>>    value0 = L1enumerate(3, T0, T1, T4);
>>>>    return value0;
>>>>   }
>>>> }
>>>>
>>>> The recursive call is generated as a recursive call in C too...
>>>>
>>>> I'm almost sure I've seen C code generated from ECL that generated 
>>>> self
>>>> tail calls as goto's, and I know it works when using the bytecode, 
>>>> where
>>>> you just have to compile the function. But my search for a way to
>>>> trigger this behavior for the generated C code has been 
>>>> unsuccessful so
>>>> far.
>>>>
>>>> I'm using ECL 16.1.3. I used "ecl -c ... --compile ..." to get the C
>>>> code, but my main build uses asdf:make-build with an ASD file.
>>>>
>>>>
>