/* 
 * st_int.h
 */

/* 
 * Copyright (c) 1999 by Kenjiro Taura, Akinori Yonezawa. All rights reserved.
 * Copyright (c) 1999 by Yoshihiro Oyama, Toshio Endo. All rights reserved.
 * Copyright (c) 1999 by Kunio Tabata. All rights reserved.
 * Copyright (c) 1999 by Mitsubishi Research Institute.  All rights reserved.
 * Copyright (c) 1999 by Information-technology Promotion Agency.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program
 * for any purpose,  provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is granted,
 * provided the above notices are retained, and a notice that the code was
 * modified is included with the above copyright notice.
 */

#ifndef __ST_INT_H__
#define __ST_INT_H__

#include <assert.h>

#if defined(st_solaris_thread)
/* tls fix */
#include <thread.h>
/* 1 if a thread creation needs to adjust concurrency explicitly.
   typically true on user-level thread libraries */
#define EXPLICIT_CONCURRENCY 1
/* 1 if we can decrease concurrency */
#define CONCURRENCY_CAN_DECREASE 0

#elif defined(st_pthread) || defined(st_old_pthread)
/* pthread.h on Digital UNIX uses c_asm.h, which declares function 
   called `asm.' c_asm.h is used by a program that uses DEC CC's inline
   assembly, so we gcc people can sefely ignore it. */
#if defined(__osf__)
#define __C_ASM_H
#endif

/* thread attribute and tls fix */
#include <pthread.h>

#if defined(sun) || defined(sgi)
#define EXPLICIT_CONCURRENCY 1
#define CONCURRENCY_CAN_DECREASE 0
#else
#define EXPLICIT_CONCURRENCY 0
#if 0
#warning "this pthread does not have a way to increase concurrency explicitly"
#endif
#endif

#elif defined(st_sfork_thread)
#include <sys/types.h>
#include <sys/prctl.h>
#define EXPLICIT_CONCURRENCY 0

#elif defined(st_nt_thread)
#include <windows.h>
#define EXPLICIT_CONCURRENCY 0

#elif defined(st_no_thread)
#define EXPLICIT_CONCURRENCY 0

#else
#error "specify thread package"

#endif

#define PRIVATE static
#define GLOBAL			/* things used from other files in runtime */
#define PUBLIC			/* things used by user */
#define USED_BY_MACRO

#define ASSERT_PTR_SIZE() assert(sizeof(void *) == PTR_BYTES)
#define ASSERT_INT_SIZE() assert(sizeof(int) == INT_BYTES)
#define ASSERT_LONG_SIZE() assert(sizeof(long) == LONG_BYTES)

#define ASSERT_DATA_SIZES() do { \
				  ASSERT_PTR_SIZE(); \
				  ASSERT_INT_SIZE(); \
				  ASSERT_LONG_SIZE(); \
				} while(0)

/* some machine-specific constants:

   KEEP_STACK_RETURN_DISPLACEMENT: the displacement of the return
   instruction when we dont free stack. unless a stack frame grows
   dynamically for pushing parameters, it is zero. 

   VOID_RETURN_DISPLACEMENT: the displacement of the return
   instruction for void functions. that is, given R as the return
   address, a void function jumps to R + VOID_RETURN_DISPLACEMENT.
   
   JUMP_AND_LINK_DISPLACEMENT: the displacement of the address linked
   by a call instruction. that is, when a call instruction is placed
   at address R, R + JUMP_AND_LINK_DISPLACEMENT is stored into the link
   register. */

#if defined(i386)
/* #define KEEP_STACK_RETURN_DISPLACEMENT 3 */
#define KEEP_STACK_RETURN_DISPLACEMENT 0
#define VOID_RETURN_DISPLACEMENT 0
#define JUMP_AND_LINK_DISPLACEMENT 5
#define ARGS_ARE_PUSHED 1
#elif defined(mips)
#define KEEP_STACK_RETURN_DISPLACEMENT 0
#define VOID_RETURN_DISPLACEMENT 0
#define JUMP_AND_LINK_DISPLACEMENT 4
#define ARGS_ARE_PUSHED 0
#elif defined(__sparc)
#define KEEP_STACK_RETURN_DISPLACEMENT 0
#define VOID_RETURN_DISPLACEMENT 8
#define JUMP_AND_LINK_DISPLACEMENT 0
#define ARGS_ARE_PUSHED 0
#elif defined(__alpha)
#define KEEP_STACK_RETURN_DISPLACEMENT 0
#define VOID_RETURN_DISPLACEMENT 0
#define JUMP_AND_LINK_DISPLACEMENT 4
#define ARGS_ARE_PUSHED 0
#else
#error "unknown CPU"
#endif

/* when the program calls st_thr_create(f, a), we invoke a thread that
   executes f(a), from within a special wrapper function that sets up
   the pointer to thread local storage and save/restore callee-save
   registers (because this is an upcall). that is, what is actually
   passed to a thread creation function (provided by the system, such
   as Solaris thr_create) is the special wrapper function. the problem
   now is that how to pass two arguments, f and a, to the wrapper
   function. we must put them in memory and pass the pointer to the
   region. that memory can be reused only after the launched thread
   read both f and a. so we pass to the wrapper a pointer to the
   following structure. the wrapper sets STATE to zero when it reads f
   and a. */

/* currently, the first parameter for st_thr_create is a function that
   takes 6 void * parameters. when you change the number of parameters
   of thread procedures, change the definition of thr_proc_t,
   st_thr_create, and thr_proc_wrapper accordingly. they are in th.c.

   also change the definition of worker_proc (in ws.c), which is passed to
   st_thr_create.

   code that needs change is guarded by #if THR_PROC_N_PARAMS == 7 so
   that a compilation error will result when you make them
   inconsistent.  */

#if (THR_PROC_N_PARAMS == 7)
typedef void * (*thr_proc_t)(void *, void *, void *, void *, void *, void *,
			     void *);

#else 
#error change here accordingly when you change THR_PROC_N_PARAMS
#endif

typedef struct thr_proc_wrapper_arg
{
  thr_proc_t f;
  void * a[THR_PROC_N_PARAMS];
  st_int_loc_t state;
} * thr_proc_wrapper_arg_t;

typedef struct st_thr_id
{
#if defined(st_solaris_thread)
  thread_t solaris_thread_id;
#elif defined(st_pthread) || defined(st_old_pthread)
  pthread_t pthread_id;
#elif defined(st_sfork_thread)	/* SGI's sproc */
  pid_t pid;
#elif defined(st_nt_thread)	/* NT */
  HANDLE nt_thread_handle;
#elif defined(st_no_thread)	/* no threads */
  void * dummy;			/* never used (just 
				   avoid empty structure) */
#else
#error "specify thread package"
#endif
} st_thr_id, * st_thr_id_t;

typedef enum thr_attr_init_state
{
  thr_attr_init_state_uninit,
  thr_attr_init_state_inited,
} thr_attr_init_state_t;

typedef struct st_thr_attribute
{
  st_int_loc_t state;		/* actually of thr_attr_init_state_t */
  long stack_size;
#if defined(st_pthread) || defined(st_old_pthread)
  pthread_attr_t attr[1];
  pthread_condattr_t condattr[1];
  pthread_mutexattr_t mutexattr[1];
#endif
} * st_thr_attribute_t;

/* we allocate a fixed region of memory for passing arguments to 
   wrappers. we simply scan the region from beginning and try to
   grab a lock (by making STATE to one) of an element. */
#define N_THR_PROC_WRAPPER_ARGS 64

/* to add a new prof_state,
   (1) add prof_state_xxx here,
   (2) make array prof_state_string[]
   in prof.c consistent
   (3) define st_prof_xxx in prof.c (see the bottom of prof.c)
   (4) add declaration for it in this file (see the bottom of this file)
*/
typedef enum prof_state {
  prof_state_start,		/* start */
  prof_state_setup_worker,	/* worker initialization */
  prof_state_busy,		/* busy */
  prof_state_sleep,		/* sleep */
  prof_state_idle,		/* idle */
  prof_state_switch,		/* context switch */
  prof_state_serv_steal,	/* serve steal request */
  prof_state_serv_msg,		/* in msg handler */
  prof_state_steal,		/* try to steal work */
  prof_state_msg,		/* send msg */
  prof_state_delete_frame,	/* delete frame */
  prof_state_spin,		/* spin */
  prof_state_exiting,		/* exiting */
  prof_state_end,		/* a distinguished state to indicate the
				   end of a profile */
  prof_state_last
} prof_state_t;

/* translate a value of type prof_state_t into a string */
extern char * prof_state_string[];

typedef struct prof_datum
{
  prof_state_t state;
  int begin_time;		/* us */
} * prof_datum_t;

/* this must be a thread local datum */
typedef struct profile
{
  /* time at which prof_change_state was called last time */
  long last_prof_time;		
  /* the state that began at last_prof_time */
  prof_state_t current_state;
  /* histgram of this interval THIS_INTERVAL_PROF_STATS[x] refers to 
     the microseconds this processor spends in state X in this interval */
  long this_interval_prof[prof_state_last];
  /* histgram of total-time spent in each state */
  long total_time_prof[prof_state_last];
  /* the number of intervals recorded */
  long n_intervals;
  /* size of the in-memory profile buffer */
  long max_intervals;
  /* the length of an interval in microsecond */
  long prof_resolution;
  /* filename */
  char * filename_prefix;
  /* local filename counter */
  long n_files;
  /* pointer to the in-memory record (whose length is max_intervals) */
  prof_datum_t intervals;
} * profile_t;
/* invariant: 
   1 <= n_intervals < max_intervals
   intervals[n_intervals - 1] holds the definite state of the last interval.
   (i.e., 
   intervals[n_intervals - 1].state and 
   intervals[n_intervals - 1].begin_time no longer change).
   at the very first interval, we set intervals[0].state = prof_state_start
   that is distinguishable from any other state.
   
   intervals[n_intervals].state has not been written (yet fixed).
   intervals[n_intervals].begin_time tentatively holds the time at which
   intervals[n_intervals - 1].state has been determined, but may be changed
   in future.
   this happens when the current interval turns out to be the same state
   as intervals[n_intervals - 1].state, in which case the current interval
   is merged to intervals[n_intervals - 1], and 
   intervals[n_intervals].begin_time is the beginning time of the new interval.
*/


/* worker group data structure.
   a worker group represents a number of workers.

   a worker group is created by giving the entry procedure
   (create_worker). the worker 0 in the group begins the entry
   procedure, while other workers are initially idle.
   workers in a group share threads created in the group.

   all workers in a group die when the entry procedure returns.
   the return value is written to RETURN_VALUE.
   */

typedef enum profiling_state {
  profiling_stopping = 0,	/* no profiling */
  profiling_doing = 1,		/* doing */
  profiling_ending = 2,		/* ending (workers are still recording) */
} profiling_state_t;

typedef struct st_oneshot_sync_var
{
  /* fields for error checking fields (not synchronization) */
  st_int_loc_t waited;		/* 1 if somebody called `wait' */
  st_int_loc_t signaled;	/* 1 if somebody called `signal' */
#if defined(st_solaris_thread)
  mutex_t m[1];
  cond_t c[1];
#elif defined(st_pthread) || defined(st_old_pthread)
  pthread_mutex_t m[1];
  pthread_cond_t c[1];
#elif defined(st_sfork_thread) 
  /* the process ID of the waiting process (or 0 if none) */
  /* int pid */
  st_long_loc_t pid;
#elif defined(st_nt_thread)
  HANDLE h;			/* semaphore object */
#elif defined(st_no_thread)
  /* when we use no thread package, done = 1 when we are done */
  int done;
#else 
#error "specify thread package"
#endif
} * st_oneshot_sync_var_t;

/* data structure to fix invalid tls pointer */
typedef struct st_tls_fix
{
#if defined(st_solaris_thread)
  /* when sthreads is running on top of Solaris threads, we make a key
     and register a thread's tls pointer using the key. */
  thread_key_t key;
#elif defined(st_pthread) || defined(st_old_pthread)
  /* the same as Solaris threads's case */
  pthread_key_t key;
#elif defined(st_sfork_thread)
  /* nothing */
#elif defined(st_nt_thread)
  DWORD key;
#elif defined(st_no_thread)
  /* we simply use a global variable that holds the tls */
  thread_local_storage_t the_tls;
#else
#error "specify thread package"
#endif
} * st_tls_fix_t;

typedef enum wg_status {
  wg_status_locked = -1,
  wg_status_running = 0,
  wg_status_exited = 1
} wg_status_t;

typedef struct worker_group 
{
  /* seed of worker id. when a worker is created, the creater first increments
     this. when the created worker initializes itself, it addes itself to
     the list of WORKERS below. therefore, worker_id_seed[0] may be larger 
     than the number of elements in workers. in particular, when OS spends
     long time to launch a worker, it may be significantly larger. */
  st_long_loc_t worker_id_seed[1];
  /* list of workers in this group.
     0 when uninitialized.
     -1 when locked */
  workers_list_t workers;
  /* keep track of the length of workers. the role is similar to 
     worker_id_seed, but N_WORKERS is incremented AFTER workers got
     updated. therefore, this may be smaller than the length of WORKERS,
     but the difference is usually small */
  st_int_loc_t n_workers;
  /* 1 if the entry function returned */
  st_int_loc_t status;		/* actually wg_status_t */
  /* synchronization variable that signals an external waiter */
  struct st_oneshot_sync_var done_var[1];
  /* return value of main */
  void * return_value;
  /* 1 if the entry function normally returned.
     0 if the group exited by an explicit call to st_wg_exit */
  int entry_returned;
  /* number of workers that noticed we are done */
  st_int_loc_t n_quittings;
  /* configuration */
  struct worker_group_conf conf;
  /* actually of type profiling_state, but declared as st_int_loc_t 
     because it is locked */
  st_int_loc_t profiling;		
} worker_group;

/* runtime data structure that are shared by all threads */

typedef struct thread_shared_storage
{
  /* procedure information table */
  st_proc_info_t *_proc_info_table;
  /* the lowest address registered in the table */
  uslong _proc_info_lo;
  /* next address of the highest address registered in the table */
  uslong _proc_info_hi;
  /* maximum/minimum of SP-relative region */
  uslong _max_sp_relative_size;
  uslong _min_sp_relative_size;
  /* maximum of SP shrink size by a single instruction */
  long _max_sp_shrink_size;

  /* data information table (array of <address, name> tuples) */
  st_data_info_t _data_info_table;
  /* number of entries in data info table */
  uslong _n_data_info_table;

  /* tls storage for the main thread */
  struct thread_local_storage _main_tls_storage[1];
  /* save invalid tls register during global constructor execution */
  thread_local_storage_t _save_invalid_tls_global_ctor;
#if EXPLICIT_CONCURRENCY
  /* increase/decrease of concurrency level that has not yet been 
     reflected to the desirable concurrency understood by the system */
  st_long_loc_t _unreflected_concurrency;
  struct st_ticket_lock _concurrency_lock[1];
#endif
  /* the counter for allocating thread id */
  st_long_loc_t _thread_id_seed[1];
  /* region via which arguments to newly created threads are passed */
  struct thr_proc_wrapper_arg 
    _thr_proc_wrapper_args[N_THR_PROC_WRAPPER_ARGS];
  /* thread attributes */
  struct st_thr_attribute _st_thr_attr[1];
  /* a key to fix invalid tls */
  struct st_tls_fix _tls_fix[1];
  /* global options */
  struct st_global_options _global_options[1];
} * thread_shared_storage_t;

extern struct thread_shared_storage __tss;

#define tss(x) __tss._##x

/* prototypes */

/* st.c */
char * st_malloc(int);
void st_free(char *);
st_proc_info_t st_get_proc_info_full(void *, int, st_proc_info_t);
#define st_get_proc_info(pc, zinf, pi) st_get_proc_info_full(pc, zinf, pi)
int st_is_fork_point(void *, st_proc_info_t);
int st_is_invalid_call_site(void *, st_proc_info_t);
void st_export_frame(void *, void **, long
#if ST_DBG
		     , void **, char *
#endif /* ST_DBG */
);
void st_init_tls(thread_local_storage_t);
void st_schedule_resumed_context(st_context_t, invalid_frame_desc_t);
int st_atoflag(char *);

#define st_respond_to_worker_msg_sys(s) st_respond_to_worker_msg(s, 0)
#define st_remove_exported_frames_sys() st_remove_exported_frames_aux(asm_get_fp(), 0)


/* ws.c */
void init_worker_group(worker_group_t, worker_group_conf_t);
workers_list_t st_current_workers_1(worker_group_t);
int st_n_current_workers_1(worker_group_t);
void create_slave_worker_1(worker_group_t);
void * become_worker_group(worker_group_conf_t,	/* configuration */
			   int, /* number of initial workers */
			   worker_proc_t f, /* entry procedure */
#if (WORKER_PROC_N_PARAMS == 4)
			   void * a0, void * a1, 
			   void * a2, void * a3
#else
#error change here accordingly when you change WORKER_PROC_N_PARAMS
#endif
			   );

int st_determine_wg_return_value(worker_group_t, void *);

/* th.c */
void set_proc_stack_size(long);
long parse_stack_size(char *);
void st_thr_proc_wrapper1(thr_proc_wrapper_arg_t);
void st_thr_proc_wrapper(thr_proc_wrapper_arg_t);
void st_thr_create(thr_proc_t, st_thr_id_t, 
#if (THR_PROC_N_PARAMS == 7)
		   void *, void *, void *, void *, void *, void *, void *
#else
#error change here accordingly when you change THR_PROC_N_PARAMS
#endif
);
int st_thr_join(st_thr_id_t);
void make_tls_key(void);
void st_set_tls(thread_local_storage_t);
void st_oneshot_sync_var_init(st_oneshot_sync_var_t);
void st_oneshot_sync_var_wait(st_oneshot_sync_var_t);
void st_oneshot_sync_var_signal(st_oneshot_sync_var_t);
int st_oneshot_sync_var_signaled(st_oneshot_sync_var_t);

/* st_nocs.c */
void st_unwind(int, st_context_t);

/* os.c */
void st_create_binary_file(char *, char *, unsigned int);

/* ma.c */
void init_proc_stack_size(void);

/* prof.c */

void st_prof_setup_worker(void);
void st_prof_busy(void);
void st_prof_sleep(void);
void st_prof_idle(void);
void st_prof_switch(void);
void st_prof_serv_steal(void);
void st_prof_serv_msg(void);
void st_prof_steal(void);
void st_prof_msg(void);
void st_prof_delete_frame(void);
void st_prof_spin(void);
void st_prof_exiting(void);

void st_config_profile_1(worker_group_t, st_prof_conf_t);
void st_begin_profile_1(worker_group_t);
void st_end_profile_1(worker_group_t);

void * asm_extend_stack(long);
void asm_set_fp_sp_and_jmp(void *, void *, void *);
void asm_set_fp_and_jmp_link(void *, void *, void **, long);
int asm_capture_context(st_context_t);
int asm_install_context(st_context_t);
void asm_save_callee_saves(uslong *);
void asm_restore_callee_saves(uslong *);

#if HAVE_ASM_READ_AND_SET_LSB8
long asm_read_and_set_lsb8(void *);
#endif
#if HAVE_ASM_READ_AND_SET_LSB4
long asm_read_and_set_lsb4(void *);
#endif

#endif /* ifndef __ST_INT_H__ */
