/* 
 * st_nocs.c
 */

/* 
 * Copyright (c) 1999 by Kenjiro Taura, Akinori Yonezawa. All rights reserved.
 * Copyright (c) 1999 by Yoshihiro Oyama, Toshio Endo. All rights reserved.
 * Copyright (c) 1999 by Kunio Tabata. All rights reserved.
 * Copyright (c) 1999 by Mitsubishi Research Institute.  All rights reserved.
 * Copyright (c) 1999 by Information-technology Promotion Agency.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose,  provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

/* this file contains C procedures that must be compiled without using
   any callee-save registers (use gcc's -ffixed-... options).
   we have no provisions for other compilers for now. */

#include <st.h>
#include "st_int.h"

/* Unwind stack through N fork points and jump there.
   During unwinding SP is kept pointing to the top of the stack. 
   Stack is then expaneded by the size of the max SP-relative area.

   the typical situation is the following, although we assume neither 
   that SP and FP point to the same frame, nor unwound frames are in
   the same stack.


				   SP-> +---------------+
   					|max SP relative|
   SP-> +---------------+	   	| area of the   |
	| ST_UNWIND 	|		| target frame 	|
   FP-> +---------------+	   	+---------------+
        |               |               |               |
	+---------------+               +---------------+

        	...				...
	
	+---------------+		+---------------+
	| target frame	|		| target frame	|
	| (nth fork 	|		| (nth fork 	|
	| 	point)	|		| 	point)	|
	+---------------+	   FP->	+---------------+
     
     before:				after: (registers hold values for the 
                                        target frame)
     

   c->fixed_pc, c->fixed_fp, and c->regs have been filled (by
   asm_capture_context). c->rap and c->pfpp have not been filled yet
   and st_unwind will fill them by the locations of the return address
   and parent's FP of the bottom most frame.


   this procedure must not use callee save registers because within
   this procedure, the control goes back and forth between this
   procedure and epilogue code of other procedures.

 */

#define is_local_frame(fp, top) \
((uslong)tls(stack_bottom) SP_LEQ (uslong)fp && (uslong)fp SP_LT (uslong)top)


GLOBAL void st_unwind(int n, st_context_t c)
{
#if ST_ASSERT_ON
  void * this_sp;		/* stack pointer of this procedure */
#endif /* ST_ASSERT_ON */
  void * this_fp = asm_get_fp(); /* frame pointer of this procedure 
				   (st_unwind) */
  void * fp = this_fp;		/* frame pointer of the frame being watched */
  void * pc = (void *)st_unwind; /* program counter being watched */
  void ** last_rap = 0;		/* return address location of the
				   lastly examined frame. 
				initialize to make gcc happy */
  void ** last_pfpp = 0;	/* parent FP location of lastly 
				   examined frame. initialize to make gcc
				happy */
  void * last_fp = 0;
  short last_return_displacement = 0; /* return displacement of lastly
					 examined frame. initialize to make
				      gcc happy */

#if ST_DBG
  char * last_proc_name = 0;	/* initialize to make gcc happy */
#endif /* ST_DBG */

  st_proc_info_t pi = st_get_proc_info(pc, 0, 0);
  int i;

  /* the first invalid frame encountered */
  invalid_frame_desc_t invalid_frames_top = 0;
  /* the last invalid frame encountered. 0 if we have encountered no 
     invalid frames */
  invalid_frame_desc_t invalid_frames_bottom = 0;

#if ST_PROF && ST_DBG && ST_ASSERT_ON
  if (tls(profile) && ST_INT_LOC_CHECK(&tls(wg)->profiling, profiling_doing)) {
    prof_state_t s = tls(profile)->current_state;
    if (s != prof_state_switch 
	&& s != prof_state_serv_steal
	&& s != prof_state_exiting) {
      fprintf(st_errout, "%ld : prof_state = %s\n", 
	      tls(worker_id), prof_state_string[s]);
      st_assert(s == prof_state_switch 
		|| s == prof_state_serv_steal
		|| s == prof_state_exiting);
    }
}
#endif /* ST_PROF */

  /* by the way, we can skip some frames if we know will encounter 
     a frame established by restart_context, because all callee-save
     registers are available for it. someday we would like to implement 
     it. */
#if 0
  /* The skip function mentioned above is under construction. */
  if(tss(global_options)->stack_unwind_optimization){
    do{
    /* restore callee saves */
    asm_fix_callee_saves();
    /* we don't really return to invalid frames, so keep invalid frame queue */
    //    tls(fixed_invalid_frames) = tls(fixed_invalid_frames)->next;
    tls(fixed_invalid_frames)->fixed_cs_regs[];
      }  while();
  }
#endif

  /* we make sure that at the beginning of each iteration, SP points
     to the same place */
#if ST_ASSERT_ON
  this_sp = asm_get_sp();
#endif

  /* unwind stack until N fork points are found */
  for (i = 0; i < n; i++) {
    /* unwind stack until a fork point is found */
    int is_fork;		/* 1 if the current procedure is forked */
    do {
      /* return address location (i.e., *rap == return address) 
	 of the frame currently watched */
      void ** rap = (void **)(fp + pi->return_address_offset);
      /* parent FP pointer location of the frame currently watched*/
      void ** pfpp = (void **)(fp + pi->parent_fp_offset);

      /* execute the epilogue code of the current frame and 
	 return to this point. to return to this point, we overwrite
	 return address location (rap) and parent FP location (pfpp) 
	 of the currently examined frame and jump into the epilogue code. 
      */
      /* get epilogue code address of the current frame */
      void * eca = (void *)pi->pure_epilogue;
      short ra_disp = VOID_RETURN_DISPLACEMENT - pi->return_displacement;

      void * orig_ra;
      void * orig_pfp;

      st_assert(this_sp == asm_get_sp());
      st_assert(pi->return_address_offset != -1);
      st_assert(pi->parent_fp_offset != -1);

      if (st_is_invalid_call_site(pc - JUMP_AND_LINK_DISPLACEMENT, pi)) {
	/* if the call site address corresponding to PC calls restart_context, 
	   callee save registers do not hold values that the caller of
	   restart_thread (the current parent of the procedure described by 
	   pi) expects. we fix this. */
	invalid_frame_desc_t top_if = tls(fixed_invalid_frames);
	asm_restore_callee_saves(top_if->fixed_cs_regs);
	/* record the first/last cell encountered in the chain */
	if (invalid_frames_top == 0) invalid_frames_top = top_if;
	invalid_frames_bottom = top_if;
	/* remove the cell from the chain */
	tls(fixed_invalid_frames) = top_if->next;
      } 

      /* when this frame is in the local stack (bottom < FP < this_fp) 
	 and above previous frame (FP > last_fp), we export it. 
	 one tricky thing is that in the first iteration (last_fp == 0), 
	 is_local_frame(last_fp, ...) is false, so we always export the
	 top frame  */
      if (is_local_frame(fp, this_fp)) {
#if ST_DBG
	st_export_frame(fp, rap, pi->base_sp_minus_fp, pfpp, pi->name);
#else  /* ST_DBG */
	st_export_frame(fp, rap, pi->base_sp_minus_fp);
#endif /* ST_DBG */
      }

      /* save its original return address and parent FP */
      orig_ra = *rap;
      orig_pfp = *pfpp;

      /* set the fake parent FP 
	 so that when control returns to this procedure, fp is set to this_fp 
      */ 
      *pfpp = (void*)this_fp;

      /* execute epilogue code and return to this point again */
      asm_set_fp_and_jmp_link(fp, eca, rap, (long)ra_disp);

      /* control returns here after epilogue. here FP points to this
	 (st_unwind's) frame, SP to top of the stack, and other callee
	 save registers usually hold values for currently watched
	 frame. 

	 this procedure must not use callee save registers. values of
	 callee-save registers change across asm_set_fp_sp_and_jmp */ 

      /* restore the original return address and parent FP */
      *rap = orig_ra;		
      *pfpp = orig_pfp;

      /* record last RAP, PFPP, and return_displacement */
      last_fp = fp;
      last_rap = rap;
      last_pfpp = pfpp;
      last_return_displacement = pi->return_displacement;
#if ST_DBG
      last_proc_name = pi->name;
#endif /* ST_DBG */

      /* next program counter and catch block */
      pc = orig_ra;
      fp = orig_pfp;

      pi = st_get_proc_info(pc - JUMP_AND_LINK_DISPLACEMENT, 0, pi);
      is_fork = st_is_fork_point(pc - JUMP_AND_LINK_DISPLACEMENT, pi);
    } while (is_fork == 0);	/* end do */
    /* st_get_catch_block(pc - JUMP_AND_LINK_DISPLACEMENT): that is, 
       call address is the fork point */
#define REWOPTIM 0
#if REWOPTIM
      /* stack rewind optimization hacking */
      /* jump and exec PROC_FORK_X directly here */
#endif
  } /* end for */

  {
    /* we have done unwinding. now jump to the handler */
    void * new_sp = c->fixed_sp;
    /* record where does the bottom-most frame stores the link to parent
       (parent FP and return address) */
    c->bottom_fp = last_fp;
    c->rap = last_rap;
    c->pfpp = last_pfpp;
    c->return_displacement = last_return_displacement;

    /* record chain of invalid frames we saw in the context. */
    /* we do not have to do this, probably. just in case. */
    if (invalid_frames_bottom) {
      invalid_frames_bottom->next = 0;
    } else {
      st_assert(invalid_frames_top == 0);
    }
    c->invalid_frames_top = invalid_frames_top;
    c->invalid_frames_bottom = invalid_frames_bottom;

#if ST_DBG
    c->bottom_proc_name = last_proc_name;
#endif /* ST_DBG */

    /* now it is safe for somebody to schedule C. */
    MEMBAR_WRITE_WRITE();
    c->valid = 1;		/* say C is now valid */

    /* here, PC points to the point where Nth most recent fork was made. 
       callee save registers hold values at that location, except for
       FP, which still points to this frame.  now we adjust stack, set
       FP for the target frame and jump into the corresponding catch
       block. */

#if ST_DBG
    if (pc == (void *)st_unwind) {
      fprintf(st_errout, "%ld : bomb\n", tls(thread_id));
      st_app_die(1);
    }
#endif

#if ST_PROF
    st_prof_busy();
#endif /* ST_PROF */
    /* SP is reset to the value of SP at the point where C was captured. */
    new_sp = GROW_STACK2(new_sp, 
			 tss(max_sp_relative_size) - tss(min_sp_relative_size),
			 tss(max_sp_shrink_size));
    asm_set_fp_sp_and_jmp(fp, new_sp, pc + last_return_displacement);
    /* + KEEP_STACK_RETURN_DISPLACEMENT was here */
  }
}

/* restart execution from the point where C is captured (more precisely,
   restart execution as if the call to capture_context that captured C 
   returned 1. the continuation of C is set to the caller of this
   RESTART_THREAD (that is, the control returns to the caller of 
   RESTART_THREAD after C finishes) */

USED_BY_MACRO void st_restart_context_n(st_context_t c, invalid_frame_desc_t iff, int n)
{
  /* modify return address and parent link of this thread (the bottom
     frame). the link is established as if the caller of resume_thread
     (call it A) called the resumed thread in the first place. simply
     installing the return address to the place where A called
     resume_thread would not work, because the callee-save registers
     would be invalid. hence, return address is to stub that restores
     correct callee-save registers. */

  void * this_sp;
  void * this_fp = asm_get_fp(); /* frame pointer of this procedure 
				   (st_unwind) */
  void * fp = this_fp;		/* frame pointer of the frame being watched */
  void * pc = (void *)st_restart_context_n; /* program counter being watched */
  st_proc_info_t pi = st_get_proc_info(pc, 0, 0);

  int i;

#if ST_PROF
  st_assert(tls(profile) == 0 
	    || tls(profile)->current_state == prof_state_switch
	    || tls(profile)->current_state == prof_state_serv_steal);
#endif /* ST_PROF */

  /* we save the value of SP just before the loop, so that we are
     sure that the value of SP must be the same as the saved SP value */
  this_sp = asm_get_sp();

  /* unwind N frames */
  for (i = 0; i < n; i++) {
    /* return address location (i.e., *rap == return address) 
       of the frame currently watched */
    void ** rap = (void **)(fp + pi->return_address_offset);
    /* parent FP pointer location of the frame currently watched*/
    void ** pfpp = (void **)(fp + pi->parent_fp_offset);

    /* execute the epilogue code of the current frame and 
       return to this point. to return to this point, we overwrite
       return address location (rap) and parent FP location (pfpp) 
       of the currently examined frame and jump into the epilogue code. */
    /* get epilogue code address of the current frame */
    void * eca = (void *)pi->pure_epilogue;
    short ra_disp = VOID_RETURN_DISPLACEMENT - pi->return_displacement;

    void * orig_ra;
    void * orig_pfp;

    st_assert(this_sp == asm_get_sp());
    st_assert(pi->return_address_offset != -1);
    st_assert(pi->parent_fp_offset != -1);

    st_assert(!st_is_invalid_call_site(pc - JUMP_AND_LINK_DISPLACEMENT, pi));
    /* save its original return address and parent FP */
    orig_ra = *rap;
    orig_pfp = *pfpp;

    /* set the fake parent FP 
       so that when control returns to this procedure, fp is set to this_fp */ 
    *pfpp = (void*)this_fp;

    /* execute epilogue code and return to this point again */
    asm_set_fp_and_jmp_link(fp, eca, rap, (long)ra_disp);

    /* control returns here after epilogue. here FP points to this
       (restart_context's) frame, SP to top of the stack, and other callee
       save registers hold values for currently watched frame. 

       this procedure must not use callee save registers. values of
       callee-save registers change across asm_set_fp_sp_and_jmp */ 
    
    /* restore the original return address and parent FP */
    *rap = orig_ra;		
    *pfpp = orig_pfp;
    
    /* next program counter and catch block */
    pc = orig_ra;
    fp = orig_pfp;
    
    pi = st_get_proc_info(pc - JUMP_AND_LINK_DISPLACEMENT, 0, pi);
    st_assert(i + 1 == n 
	      || !st_is_fork_point(pc - JUMP_AND_LINK_DISPLACEMENT, pi));
  } /* end for */

  /* here FP points to the frame that must be linked from the bottom
     frame of the restarted context. PI points to the procedure information
     of that frame (pointed to by FP). Callee-save registers hold values
     for that frame. */
  {
    /* save callee save registers */
    uslong * cs = iff->fixed_cs_regs;
#if CS_ASSUMES_DW_ALIGNED
    st_assert(((uslong)cs & 7) == 0);
#endif  /* CS_ASSUMES_DW_ALIGNED */
    
    /* save callee save registers */
    asm_save_callee_saves(cs);
    
    /* here C must be valid */
    while (c->valid == 0);
    MEMBAR_READ_READ();
    
    /* because we push threads in this context on top of the stack,
       and the context represents c->n_threads threads, it seems more 
       reasonable to add c->n_threads to tls(n_stacked_threads). however, 
       restart_context itself MERGES the bottom thread into the current 
       thread (caller). thus, we gain N threads, but, at the same time, 
       lose 1 thread. usually, restart_context itself is called by FORK_PROC, 
       so we effectively gain N threads. */

    tls(n_total_threads) += (c->n_threads - 1);
    
    /* push this iff on top of the global chain of invalid frames */
    iff->next = tls(fixed_invalid_frames);
    tls(fixed_invalid_frames) = iff;

    /* in addition, push the chain of invalid frames on top of the 
       global chain of invalid frames */
    if (c->invalid_frames_bottom) {
      c->invalid_frames_bottom->next = tls(fixed_invalid_frames);
      tls(fixed_invalid_frames) = c->invalid_frames_top;
    } else {
      st_assert(c->invalid_frames_top == 0);
    }

#if 1				/* probably unnecessary */
    c->invalid_frames_top = 0;
    c->invalid_frames_bottom = 0;
#endif

    if (is_local_frame(fp, this_sp) &&
	((!is_local_frame(c->bottom_fp, this_sp)) ||
	 (uslong)fp SP_GT (uslong)tls(stack_bottom))) {
      st_export_frame(fp, (void**)(fp + pi->return_address_offset), 
		      pi->base_sp_minus_fp
#if ST_DBG
		      , (void**)(fp + pi->parent_fp_offset), pi->name
#endif /* ST_DBG */
		      );
    }

    /* install the link to parent FP. */
    *c->pfpp = fp;
    *c->rap = pc + VOID_RETURN_DISPLACEMENT - c->return_displacement;

#if ST_PROF
    st_prof_busy();
#endif /* ST_PROF */
    /* goto the resumed thread and return to the caller */
    asm_extend_stack(tss(max_sp_relative_size) - tss(min_sp_relative_size) 
		     + tss(max_sp_shrink_size));
    asm_install_context(c);
  }
}


