Tail self-call optimization in generated C code?

Tue Aug 22 14:52:22 UTC 2017

Le 2017-08-22 10:48, Daniel Kochmański a écrit :
> Hey,
> 
> from compiler sources (see src/cmp/cmpexit.lsp for functions which
> determine if TSC is possible):
> 
> ;;; Tail-recursion optimization for a function F is possible only if
> ;;;     1. F receives only required parameters, and
> ;;;     2. no required parameter of F is enclosed in a closure.
> ;;;
> ;;; A recursive call (F e1 ... en) may be replaced by a loop only if
> ;;;     1. F is not declared as NOTINLINE,
> ;;;     2. n is equal to the number of required parameters of F,
> ;;;     3. the form is a normal function call (i.e. args are not 
> ARGS-PUSHED),
> ;;;     4. (F e1 ... en) is not surrounded by a form that causes 
> dynamic
> ;;;        binding (such as LET, LET*, PROGV),
> ;;;     5. (F e1 ... en) is not surrounded by a form that that pushes a 
> frame
> ;;;        onto the frame-stack (such as BLOCK and TAGBODY whose tags 
> are
> ;;;        enclosed in a closure, and CATCH),

Thanks a lot! The function did use quite a few let and let*, and the 
problem was there. I reorganized the code, and the optimization does 
happen now.

> Best regards,
> 
> Daniel
> 
> 
> On 22.08.2017 10:41, Eric Brunel wrote:
>> I thought it would work, but I still have a problem: I reorganized the 
>> code so that there aren't any &optional parameters in any of my 
>> functions and it worked quite well: the generated C code for almost 
>> all of them do optimize the tail self-call... except one, which is of 
>> course the biggest and most complicated one. I double-checked it 
>> thoroughly, and I can't find any reason why it wouldn't work: it is 
>> properly tail-recursive, and actually, every recursive call in the 
>> generated C code is immediately followed by a 'return'.
>> 
>> Now I noticed that for every function, the generated C code starts 
>> with something like:
>> 
>> {
>>  /* Some declarations... */
>>  const cl_env_ptr cl_env_copy = ecl_process_env();
>>  cl_object value0;
>>  ecl_cs_check(cl_env_copy,value0);
>>  {
>> TTL:
>> 
>> the TTL label being used for the tail-call optimization. This happens 
>> even if the function is not recursive at all, in which case the TTL 
>> label is never used.
>> 
>> But for the function that doesn't work, the 'TTL:' line is not even 
>> there. So there must be something in the Lisp function that prevents 
>> the TCO from happening. Is there a document somewhere that describes 
>> how the TCO is done, and in which cases it cannot be done? That would 
>> help me a lot to figure out what's happening here and how to fix it...
>> 
>> 
>> 
>> Le 2017-08-21 15:14, Eric Brunel a écrit :
>>> Le 2017-08-21 14:55, PR a écrit :
>>>> 2017-08-21 14:11 GMT+02:00, Eric Brunel <eric.brunel at pragmadev.com>:
>>>>> Hello all,
>>>>> 
>>>>> I'm trying to get ECL to optimize tail self-calls in the C code
>>>>> generated from the Lisp files and it looks like I'm missing 
>>>>> something,
>>>>> because I can't find a way to do that.
>>>> 
>>>> Here is an example that works (not written by me, it's from 
>>>> cliki.net):
>>>> 
>>>> (defun fib (n)
>>>>   "Tail-recursive computation of the nth element."
>>>>   (check-type n (integer 0 *))
>>>>   (labels ((fib-aux (n f1 f2)
>>>>              (if (zerop n)
>>>>                  f1
>>>>                  (fib-aux (1- n) f2 (+ f1 f2)))))
>>>>     (fib-aux n 0 1)))
>>>> 
>>>> The generated C code uses goto, as you can see:
>>>> 
>>>> static cl_object LC1fib_aux(cl_object v1n, cl_object v2f1, cl_object 
>>>> v3f2)
>>>> {
>>>>  cl_object env0;
>>>>  const cl_env_ptr cl_env_copy = ecl_process_env();
>>>>  cl_object value0;
>>>>  ecl_cs_check(cl_env_copy,value0);
>>>>  {
>>>> TTL:
>>>>   if (!(ecl_zerop(v1n))) { goto L1; }
>>>>   value0 = v2f1;
>>>>   cl_env_copy->nvalues = 1;
>>>>   return value0;
>>>> L1:;
>>>>   v1n = ecl_one_minus(v1n);
>>>>   {
>>>>    cl_object v4;
>>>>    v4 = v3f2;
>>>>    v3f2 = ecl_plus(v2f1,v3f2);
>>>>    v2f1 = v4;
>>>>   }
>>>>   goto TTL;
>>>>  }
>>>> }
>>>> 
>>> 
>>> It does indeed, and I think I got it: the optional parameters to my
>>> function seem to prevent the optimization from happening. If I 
>>> rewrite
>>> my function as:
>>> 
>>> (defun enumerate-aux (l index result)
>>>   (cond
>>>     ((null l) (reverse result))
>>>     (t (enumerate-aux (cdr l) (+ index 1) (cons (list index (car l)) 
>>> result)))
>>>   )
>>> )
>>> (defun enumerate (l) (enumerate-aux l 0 nil))
>>> 
>>> the generated code for the enumerate-aux function is as expected:
>>> 
>>> static cl_object L1enumerate_aux(cl_object v1l, cl_object v2index,
>>> cl_object v3result)
>>> {
>>>  cl_object T0, T1;
>>>  cl_object env0;
>>>  const cl_env_ptr cl_env_copy = ecl_process_env();
>>>  cl_object value0;
>>>  ecl_cs_check(cl_env_copy,value0);
>>>  {
>>> TTL:
>>>   if (!(v1l==ECL_NIL)) { goto L1; }
>>>   value0 = cl_reverse(v3result);
>>>   return value0;
>>> L1:;
>>>   {
>>>    cl_object v4;
>>>    v4 = ecl_cdr(v1l);
>>>    {
>>>     cl_object v5;
>>>     v5 = ecl_plus(v2index,ecl_make_fixnum(1));
>>>     T0 = ecl_car(v1l);
>>>     T1 = cl_list(2, v2index, T0);
>>>     v3result = CONS(T1,v3result);
>>>     v2index = v5;
>>>     v1l = v4;
>>>    }
>>>   }
>>>   goto TTL;
>>>  }
>>> }
>>> 
>>> Not sure I understand why, but I have a solution.
>>> 
>>> Thank you very much!
>>>  - Eric -
>>> 
>>> 
>>>> 
>>>> Paul
>>>> 
>>>> 
>>>>> 
>>>>> Here is the behavior I'm getting for this simple Lisp function:
>>>>> 
>>>>> (defun enumerate (l &optional (index 0) (result nil))
>>>>>    (cond
>>>>>      ((null l) (reverse result))
>>>>>      (t (enumerate (cdr l) (+ index 1) (cons (list index (car l))
>>>>> result)))
>>>>>    )
>>>>> )
>>>>> 
>>>>> As far as I can see, this function is properly tail-recursive, so I
>>>>> expected the generated C code to take the into account and generate 
>>>>> a
>>>>> goto instead of a recursive call. But what I'm getting is this:
>>>>> 
>>>>> static cl_object L1enumerate(cl_narg narg, cl_object v1l, ...)
>>>>> {
>>>>>   cl_object T0, T1, T2, T3, T4;
>>>>>   cl_object env0;
>>>>>   const cl_env_ptr cl_env_copy = ecl_process_env();
>>>>>   cl_object value0;
>>>>>   ecl_cs_check(cl_env_copy,value0);
>>>>>   if (ecl_unlikely(narg<1)) FEwrong_num_arguments_anonym();
>>>>>   if (ecl_unlikely(narg>3)) FEwrong_num_arguments_anonym();
>>>>>   {
>>>>>    cl_object v2index;
>>>>>    cl_object v3result;
>>>>>    va_list args; va_start(args,v1l);
>>>>>    {
>>>>>     int i = 1;
>>>>>     if (i >= narg) {
>>>>>      v2index = ecl_make_fixnum(0);
>>>>>     } else {
>>>>>      i++;
>>>>>      v2index = va_arg(args,cl_object);
>>>>>     }
>>>>>     if (i >= narg) {
>>>>>      v3result = ECL_NIL;
>>>>>     } else {
>>>>>      i++;
>>>>>      v3result = va_arg(args,cl_object);
>>>>>     }
>>>>>    }
>>>>>    va_end(args);
>>>>>    if (!(v1l==ECL_NIL)) { goto L3; }
>>>>>    value0 = cl_reverse(v3result);
>>>>>    return value0;
>>>>> L3:;
>>>>>    T0 = ecl_cdr(v1l);
>>>>>    T1 = ecl_plus(v2index,ecl_make_fixnum(1));
>>>>>    T2 = ecl_car(v1l);
>>>>>    T3 = cl_list(2, v2index, T2);
>>>>>    T4 = CONS(T3,v3result);
>>>>>    value0 = L1enumerate(3, T0, T1, T4);
>>>>>    return value0;
>>>>>   }
>>>>> }
>>>>> 
>>>>> The recursive call is generated as a recursive call in C too...
>>>>> 
>>>>> I'm almost sure I've seen C code generated from ECL that generated 
>>>>> self
>>>>> tail calls as goto's, and I know it works when using the bytecode, 
>>>>> where
>>>>> you just have to compile the function. But my search for a way to
>>>>> trigger this behavior for the generated C code has been 
>>>>> unsuccessful so
>>>>> far.
>>>>> 
>>>>> I'm using ECL 16.1.3. I used "ecl -c ... --compile ..." to get the 
>>>>> C
>>>>> code, but my main build uses asdf:make-build with an ASD file.
>>>>> 
>>>>> 
>>