/*
  $Header: /home/sabi/pcg/Cmd/Commands/team.c,v 3.11 2005/10/11 19:17:57 pcg Exp pcg $

  Compile with: cc -Wall -s -O2 team.c -o team

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#define DEBUG			0

#ifndef __STDC__
# error this version of team needs ANSI C
#endif

#if (defined __GNUC__ || defined _MSC_VER)
# ifndef TeamLONGLONG
#   define TeamLONGLONG		1
# endif
#endif

static const char Notice[] =
  "Copyright 1987,1989,2002 PeterG <pg_freesw {at} freesw.for.sabi.co.UK>\n"
  "team version $Revision: 3.11 $ $Date: 2005/10/11 19:17:57 $\n";

/*
  External components...  Probably the only system dependent part
  of this program, as some systems have something in
  /usr/include/sys where others have it in /usr/include.

  Also, the mesg() procedure is highly system dependent...  watch
  out for locking and variable number of arguments.
*/

#ifdef linux
# ifndef _XOPEN_SOURCE
#   define _XOPEN_SOURCE        600
# endif
#endif

#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#ifdef linux
# include <linux/fadvise.h>
# include <linux/fcntl.h>
#else
# include <fcntl.h>
#endif

#ifdef sun
# undef F_SETLKW
#endif

#ifndef LOCKTTY
# define LOCKTTY	        1
#endif

/*
  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 1, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You may have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
  Unix programs normally do synchronous read and write, that is,
  you read and then you write; no overlap is possible.

  This is especially catastrophic for device to device copies,
  whereit is important to minimize elapsed time, by overlapping
  activity on one with activity on another.

  To obtain this, a multiprocess structure is necessary under
  Unix.  This program is functionally equivalento to a pipe, in
  that it copies its input (fd 0) to its output (fd 1) link.

  This programs is executed as a Team of N processes, called
  Mates, all of which share the same input and output links; the
  first reads a chunk of input, awakens the second, writes the
  chunk to its output; the second does the same, and the last
  awakens the first.

  Since this process is essentially cyclic, we use a ring of
  pipes to synchronize the Mates.  Each mate has un input pipe from
  the upstream mate and an output pipe to the downstream mate.
  Whenever a mate receives a READ command from the upstream, it
  first reads a block and then passes on the READ command
  downstream; it then waits for a WRITE command from upstream,
  and then writes the block and after that passes the WRITE
  command downstream. A count of how much has been processed is
  also passwd along, for statistics and verification.

  Two other commands are used, one is STOP, and is sent
  downstream from the mate that detects the end of file of the
  input, after which the mate exits, and ABORT, which is sent
  downstream from the mate which detects trouble in the mate
  upstream to it, which has much the same effect.
*/

/*
  Default and high number of processes
*/
#define TeamDTEAMCNT		(3)
#define TeamHTEAMCNT		(16)

#define TeamGRANULE		(0)
#define TeamVOLNOLIMIT		(0)

#if (TeamLONGLONG)
  typedef long long unsigned	offset;
# define _LU			"llu"
# define TeamLVOL_GRANULES 	((offset) 1)
# define TeamHVOL_GRANULES 	(((offset) 1) << (41-TeamGRANULE))
#else
  typedef long unsigned		offset; 	 
# define _LU			"lu"
# define TeamLVOL_GRANULES 	((offset) 1)
# define TeamHVOL_GRANULES 	(((offset) 1)<<(31-TeamGRANULE))
#endif

/*
  Low, default and high buffer sizes
*/
#define Team1KB_GRANULES	(((address) 1) << (10-TeamGRANULE))

#define TeamLBUF_BYTES		(1*Team1KB_GRANULES)
#define TeamDBUF_BYTES		(32*Team1KB_GRANULES)
#define TeamHBUF_BYTES		((1<<16)*Team1KB_GRANULES)

/*
  Some shorthands. Uhmm, could do without.
*/

#define mode(which,name) typedef which name name; which name

#define scalar		unsigned
#define bool		unsigned
#define true		(1)
#define false		(0)

typedef long unsigned	address;
typedef char	    	*pointer;
#define nil(type)	((type) 0)

#if (!DEBUG)
# define Mesg(list)
#else
# define Mesg(list)	mesg list
#endif

/*VARARGS1*/
static void mesg(const char *const f,...)
{
# if (LOCKTTY)
#   if (defined LOCK_EX)
      flock(fileno(stderr),LOCK_EX|LOCK_NB);
#   else
#     if (defined F_SETLKW)
	struct flock l;
	l.l_whence = 0; l.l_start = 0L; l.l_len = 0L;
	l.l_type = F_WRLCK; fcntl(fileno(stderr),F_SETLKW,&l);
#     endif
#   endif
# endif

  {
    va_list ap;

    va_start(ap,f);

    vfprintf(stderr,f,ap);

    va_end(ap);

  }
# if (LOCKTTY)
#   if (defined LOCK_EX)
      flock(fileno(stderr),LOCK_UN);
#   else
#     if (defined F_SETLKW)
	l.l_type = F_UNLCK; fcntl(fileno(stderr),F_SETLKW,&l);
#     endif
#   endif
# endif
}

static bool		nocontinue = false;
static bool		verbose = false;
static bool		report = true;
static time_t		origin;

static size_t		pagesize = 0L;


/*
  If we can get the page size, and we have O_DIRECT, we should allocate
  buffers with an address which is page-aligned, in the forlorn hope that
  this might help.

  This is a classic bit of code to allocate larger blocks, return an
  aligned address inside the block, with the previous word being the
  address of the enclosing block.
*/
#if !(defined _SC_PAGESIZE && defined O_DIRECT)
  static void *mallocpaged(const size_t n)
  {
    return malloc(n);
  }

  static void freepaged(void *const b)
  {
    free(b);
  }
#else
  static void *mallocpaged(const size_t n)
  {
    if (pagesize == 0)
      pagesize = sysconf(_SC_PAGESIZE);

    if (pagesize == 0)
      return malloc(n);

    {
      const size_t m = pagesize + sizeof (long unsigned) + n;
      void *const b = malloc(m);

      if (b == 0)
	return b;

      {
	long unsigned a = (long unsigned) b;

	a += pagesize - (a % pagesize);

	((long unsigned *) a)[-1] = b;

	return (void *) a;
      }
    }
  }

  static void freepaged(void *const b)
  {
    if (b == 0)
      return;

    free((void *) ((long unsigned *) b)[-1]);

  }
#endif

/*
  The  regular Unix read and write calls are not guaranteed to process
  all  the  bytes  requested.  These  procedures guarantee that if the
  request is for N bytes, all of them are read or written unless there
  is an error or eof.
*/

#define FdCLOSED    	0
#define FdOPEN	    	1
#define FdEOF 	    	2
#define FdERROR     	3

mode(struct,Fd)
{
  int			  fd;
  short			  status;
  offset		  volbytes;
};

static Fd		FdIn,FdOut;

static bool		FdOpen
(
  register Fd		  *const fd,
  const int		  ffd,
  const offset		  volbytes
)
{
  fd->fd	= ffd;
  fd->status	= (ffd >= 0) ? FdOPEN :   FdCLOSED;
  fd->volbytes	= volbytes;

  Mesg(("FdOpen fd %d\n",ffd));

  return ffd >= 0;
}

static bool		FdClose
(
  register Fd	  	*const fd
)
{
  const int 		  ffd = fd->fd;

  Mesg(("FdClose fd %d\n",fd->fd));

  fd->fd	= -1;
  fd->status	= FdCLOSED;

  return close(ffd) >= 0;
}

static bool		FdCopy
(
  register Fd		  *const to,
  register const Fd	  *const from
)
{
  to->fd	= dup(from->fd);
  to->status	= from->status;
  to->volbytes	= from->volbytes;

  Mesg(("FdCopy of %d is %d\n",from->fd,to->fd));

  return to->fd >= 0;
}

static void		FdSet
(
  register Fd		  *const to,
  register const Fd	  *const from
)
{
  if (from->fd < 0)
    mesg("team: set an invalid fd\n");

  to->fd	= from->fd;
  to->status	= from->status;
  to->volbytes	= from->volbytes;
}

static offset		FdRetry
(
  register Fd		  *const fd,
  char			  *const which,
  const offset		  donebytes,
  const offset		  space
)
{
  if (nocontinue)
  {
    fd->status = FdEOF;
    return 0;
  }

  {
    struct stat		  st;

    if (fstat(fd->fd,&st) < 0)
    {
      perror(which);
      return 0;
    }

    st.st_mode &= S_IFMT;
    if (st.st_mode != S_IFCHR && st.st_mode != S_IFBLK)
    {
      fd->status = FdEOF;
      return 0;
    }
  }

  if (!isatty(fileno(stderr)))
  {
    fd->status = FdEOF;
    return 0;
  }


  {
    int			  tty;
    char		  reply[2];

    if ((tty = open("/dev/tty",0)) < 0)
    {
      perror("/dev/tty");
      return 0;
    }

    do
    {
#     if (defined _POSIX_SOURCE)
	const char		*const errmsg = strerror(errno);
#     else
#       if (defined i386 || defined sun)
	  extern const char	*const (sys_errlist[]);
	  const char		*const errmsg = sys_errlist[errno];
#       else
	  char			errmsg[32];
	  (void) sprintf(errmsg,"Error %d",errno);
#       endif
#     endif

      if (errno)
	mesg("'%s' on %s after %"_LU"KB. Continue [cyn] ? ",
	     errmsg,which,donebytes>>(10-TeamGRANULE));
      else
	mesg("EOF on %s after %"_LU"KB. Continue [cyn] ? ",
	     which,donebytes>>(10-TeamGRANULE));

      (void) read(tty,reply,sizeof reply);
    }
    while (strchr("cCyYnN",reply[0]) == 0);

    (void) close(tty);

    if (strchr("nN",reply[0]) != 0)
    {
      fd->status = FdEOF;
      return 0;
    }

    errno = 0;

    if (strchr("cC",reply[0]) != 0)
    {
      (void) lseek(fd->fd,0L,0);
      return fd->volbytes;
    }
  }

  return space;
}

static int		FdDoRead
(
  const int		  fd,
  const pointer		  buffer,
  register address	  donebytes,
  register address	  dobytes
)
{
  const int bytesRead = read(fd,buffer+donebytes,dobytes-donebytes);

  return (bytesRead <= 0) ? bytesRead : bytesRead;
}

static int		FdDoWrite
(
  const int		  fd,
  const pointer		  buffer,
  register address	  donebytes,
  register address	  dobytes
)
{
  int bytesWritten = write(fd,buffer+donebytes,dobytes-donebytes);

  return (bytesWritten <= 0) ? bytesWritten : bytesWritten;
}

static address		FdRead
(
  register Fd		  *const fd,
  const pointer		  buffer,
  register const address  todobytes,
  const offset		  donebytes
)
{
  switch (fd->status)
  {
  case FdEOF:     return 0;
  case FdERROR:   return -1;
  case FdCLOSED:  return -1;

  case FdOPEN:
    {
      register int	  readbytes = 0;
      register address	  prevbytes;
      register offset	  spacebytes;

      spacebytes = fd->volbytes - donebytes%fd->volbytes;

      for (prevbytes = 0; spacebytes != 0L && prevbytes < todobytes;)
      {
        const address maxreadbytes = (todobytes <= spacebytes)
          ? todobytes : (address) spacebytes;

        readbytes = FdDoRead(fd->fd,buffer,prevbytes,maxreadbytes);

        Mesg(("FdRead readbytes %d prevbytes %d"
          " todobytes %d spacebytes %"_LU"\n",
          readbytes,prevbytes,todobytes,spacebytes));

        if (readbytes <= 0 || (prevbytes += readbytes) == spacebytes)
          spacebytes = FdRetry(fd,"input",
            donebytes+prevbytes,spacebytes-prevbytes);
      }

      if (readbytes == 0)
        fd->status = FdEOF;
      else if (readbytes < 0)
        fd->status = FdERROR;

      Mesg(("FdRead %d read %dB last %dB\n",fd->fd,prevbytes,readbytes));

      return (prevbytes == 0) ? readbytes : prevbytes;
    }

  default:
    mesg("team: impossible status %d in 'FdRead'\n",fd->status);
    exit(1);
  }

  /*NOTREACHED*/
}

static address		FdWrite
(
  register Fd		  *const fd,
  const pointer		  buffer,
  register const address  todobytes,
  const offset		  donebytes
)
{
  switch (fd->status)
  {
  case FdEOF:     return 0;
  case FdERROR:   return -1;
  case FdCLOSED:  return -1;

  case FdOPEN:
    {
      register int	  writtenbytes = 0;
      register offset	  spacebytes;
      register address	  prevbytes;

      spacebytes = fd->volbytes - donebytes%fd->volbytes;

      for (prevbytes = 0; spacebytes != 0L && prevbytes < todobytes;)
      {
        const address maxwritebytes = (todobytes <= spacebytes)
          ? todobytes : (address) spacebytes;

        writtenbytes = FdDoWrite(fd->fd,buffer,prevbytes,maxwritebytes);

        Mesg(("FdWrite writtenbytes %d prevbytes %d"
          " todobytes %d spacebytes %"_LU"\n",
          writtenbytes,prevbytes,todobytes,spacebytes));

        if (writtenbytes <= 0 || (prevbytes += writtenbytes) == spacebytes)
          spacebytes = FdRetry(fd,"output",
            donebytes+prevbytes,spacebytes-prevbytes);
      }

      Mesg(("FdWrite %d writes %dB last %dB\n",fd->fd,prevbytes,writtenbytes));

      if (writtenbytes == 0)
        fd->status =   FdEOF;
      else if (writtenbytes < 0)
        fd->status =   FdERROR;

      return (prevbytes == 0) ? writtenbytes : prevbytes;
    }

  default:
    mesg("team: impossible status %d in 'FdWrite'\n",fd->status);
    exit(1);
  }

  /*NOTREACHED*/
}

/*
  A Token is scalar   value   representing a command.
*/

typedef short scalar	Token;

#define TokenREAD	(0)
#define TokenWRITE	(1)
#define TokenSTOP	(2)
#define TokenABORT	(-1)

/*
  Here we represent Streams as Fds; this is is not entirely appropriate,
  as Fds have also a volume bytes size, and relatively high overhead
  write and read functions.  Well, we just take some liberties with
  abstraction levels here.  Actually we should have an Fd abstraction
  for stream pipes and a Vol abstraction for input and output...
*/

static bool		StreamPipe
(
  register Fd		  *const downstream,
  register Fd		  *const upstream
)
{
  int			  links[2];

  if (pipe(links) < 0)
  {
    perror("team: opening links");
    return false;
  }

  Mesg(("StreamPipe fd downstream %d upstream %d\n",links[1],links[0]));

  return
    FdOpen(downstream,links[1],TeamHVOL_GRANULES)
    && FdOpen(upstream,links[0],TeamHVOL_GRANULES);
}

mode(struct,StreamMsg)
{
  Token			  token;
  short			  status;
  offset		  donebytes;
};

static bool		StreamSend
(
  register const Fd	  *const fd,
  const Token 		  token,
  const short 		  status,
  const offset		  donebytes
)
{
  StreamMsg		  message;
  register int		  n;

  message.token = token;
  message.status = status;
  message.donebytes = donebytes;

  n = write(fd->fd,(pointer) &message,sizeof message);

  Mesg(("StreamSend fd %u n %d token %d\n",fd->fd,n,token));

  return n == sizeof message;
}

static bool		StreamReceive
(
  register const Fd	  *const fd,
  Token 		  *const tokenp,
  short 		  *const statusp,
  offset		  *const donep
)
{
  register int		  n;
  StreamMsg		  message;

  n = read(fd->fd,(pointer) &message,sizeof message);

  *tokenp = message.token;
  *statusp = message.status;
  *donep = message.donebytes;

  Mesg(("StreamReceive fd %u n %d token %d\n",fd->fd,n,*tokenp));

  return n == sizeof message;
}
/*
  A mate is an instance of the input to output copier. It is attached
  to a relay station, with an upstream link, from which commands
  arrive, and a downward link, to which they are relayed once they are
  executed.
*/

mode(struct,Mate)
{
  int			  pid;
  Fd			  upStream;
  Fd			  downStream;
};

static bool		MateOpen
(
  register Mate		  *const mate,
  const int		  pid,
  Fd			  *const upstream,
  Fd			  *const downstream
)
{
  Mesg(("MateOpen pid %u upstream %u downstream %u\n",
    pid,upstream->fd,downstream->fd));

  mate->pid = pid;
  FdSet(&mate->upStream,upstream);
  FdSet(&mate->downStream,downstream);

  return true;
}

static bool		MateStop(const Mate *const,
			  const char *const ,const offset);

static bool		MateStart
(
  register Mate		  *const mate,
  const address		  bufbytes
)
{
  register char		  *buffer;
  Token 		  token;
  short 		  status;
  offset		  donebytes;
  bool		   	  received;
  static int 		  readbytes,writtenbytes;

  Mesg(("MateStart mate %#x bufbytes %uB\n",mate,bufbytes));

  buffer = (pointer) mallocpaged((long) bufbytes);
  if (buffer == nil(pointer))
  {
    mesg("team: mate %d cannot allocate %"_LU"KB\n",mate->pid,bufbytes);
    return false;
  }

  while ((received = StreamReceive(&mate->upStream,&token,&status,&donebytes))
      && token != TokenSTOP)
    switch (token)
    {
    case TokenREAD:
      FdIn.status = status;

      Mesg(("MateStart reading %uB\n",bufbytes));
      readbytes = FdRead(&FdIn,(pointer) buffer,
        (address) bufbytes,donebytes);
      Mesg(("MateStart read %dB\n",readbytes));

#ifdef POSIX_FADV_DONTNEED
# if 1
#   warning using POSIX_FADV_DONTNEED
      if (donebytes >= 0 && readbytes >= 0)
      {
        Mesg(("MateStart READ advising DONTNEED from %"_LU" for %d\n",
          donebytes,readbytes));
        (void) posix_fadvise(FdIn.fd,
          donebytes,(off_t) readbytes,POSIX_FADV_DONTNEED);
        Mesg(("MateStart READ advising DONTNEED errno %d\n",errno));
        errno = 0;
      }
# endif
#endif

#ifdef POSIX_FADV_WILLNEED
# if 1
#   warning using POSIX_FADV_WILLNEED
      if (donebytes >= 0 && readbytes > 0)
      {
        Mesg(("MateStart READ advising WILLNEED from %"_LU" for %d\n",
          donebytes+readbytes,bufbytes));
        (void) posix_fadvise(FdIn.fd,
          donebytes+readbytes,(off_t) bufbytes,POSIX_FADV_WILLNEED);
        Mesg(("MateStart READ advising WILLNEED errno %d\n",errno));
        errno = 0;
      }
# endif
#endif

      if (readbytes == 0)
	MateStop(mate,nil(char *),donebytes);
      if (readbytes < 0)
	MateStop(mate,"error on mate read",donebytes);

      donebytes += readbytes;

      if (verbose)
#if (TeamLONGLONG)
	mesg("%"_LU"MB read   \r",donebytes>>(20-TeamGRANULE));
#else
	mesg("%"_LU"KB read   \r",donebytes>>(10-TeamGRANULE));
#endif

      if (!StreamSend(&mate->downStream,TokenREAD,FdIn.status,donebytes))
	MateStop(mate,"mate cannot send READ",donebytes);

      break;

    case TokenWRITE:
      FdOut.status = status;

      Mesg(("MateStart writing %dB\n",readbytes));
      writtenbytes = FdWrite(&FdOut,(pointer) buffer,
        (address) readbytes,donebytes);
      Mesg(("MateStart written %dB\n",writtenbytes));

#ifdef POSIX_FADV_DONTNEED
# if 1
#   warning using POSIX_FADV_DONTNEED
      if (donebytes >= 0 && writtenbytes > 0)
      {
        Mesg(("MateStart WRITE advising DONTNEED from %"_LU" for %d\n",
            donebytes,writtenbytes));
        (void) posix_fadvise(FdOut.fd,
          donebytes,(off_t) writtenbytes,POSIX_FADV_DONTNEED);
        Mesg(("MateStart WRITE advising DONTNEED errno %d\n",errno));
        errno = 0;
      }
#  endif
#endif

      if (writtenbytes == 0)
	MateStop(mate,"eof on mate write",donebytes);
      if (writtenbytes < 0)
	MateStop(mate,"error on mate write",donebytes);

      donebytes += writtenbytes;

      if (verbose)
#if (TeamLONGLONG)
	mesg("%"_LU"MB written\r",donebytes>>(20-TeamGRANULE));
#else
	mesg("%"_LU"KB written\r",donebytes>>(10-TeamGRANULE));
#endif

      if (!StreamSend(&mate->downStream,TokenWRITE,FdOut.status,donebytes))
	MateStop(mate,"mate cannot send WRITE",donebytes);

      break;

    case TokenABORT:
      MateStop(mate,"mate was aborted",0L);
      break;

    default:
      MateStop(mate,"impossible token on ring",donebytes);
    }

  /* freepaged((char *) buffer); */

  MateStop(mate,(received) ? nil(char *) : "error on upstream receive",0L);

  /*NOTREACHED*/
  return true;
}

static bool		MateStop
(
  register const Mate	  *const mate,
  const char		  *const errormsg,
  const offset		  donebytes
)
{
  Mesg(("MateStop mate %#x\n",mate));

  if (donebytes != 0)
  {
    if (report)
    {
      const offset	 elapsed = time((time_t *) 0) - origin;
      const unsigned
	hours = elapsed/3600,
	minutes = elapsed/60 - hours*60,
        seconds = elapsed%60;

#if (TeamLONGLONG)
      mesg("%"_LU"MB, %02u:%02u:%02u (%"_LU" KB/s)\r",
	   donebytes>>(20-TeamGRANULE),
           hours,minutes,seconds,
	   ((donebytes+(1L<<(9-TeamGRANULE)))>>(10-TeamGRANULE))/elapsed
      );
#else
      mesg("%"_LU"KB, %02u:%02u:%02u (%"_LU" KB/s)\r",
	   donebytes>>(10-TeamGRANULE),
           hours,minutes,seconds,
	   ((donebytes+(1L<<(9-TeamGRANULE)))>>(10-TeamGRANULE))/elapsed
      );
#endif
    }

    if (verbose || report)
      mesg("%s","\n");
  }

  if (errormsg != nil(char *))
  {
    mesg("team: mate pid %u: %s\n",mate->pid,errormsg);
    (void) StreamSend(&mate->downStream,TokenABORT,FdERROR,0L);
    exit(1);
    /*NOTREACHED*/
  }

  if (!StreamSend(&mate->downStream,TokenSTOP,FdEOF,0L))
  {
    exit(1);
    /*NOTREACHED*/
  }

  exit(0);
  /*NOTREACHED*/
}

static bool		MateClose
(
  register Mate		  *const mate
)
{
  return FdClose(&mate->upStream) && FdClose(&mate->downStream);
}

/*
  A team is made up of a ring of mates; each mate copies a blockfrom its
  input to its ouput, and is driven by tokens sent to it by the
  previous mate on a pipe.
*/

mode(struct,Team)
{
  Mate			  *mates;
  short unsigned	  count;
  short unsigned	  active;
};

static bool		TeamOpen
(
  Team			  *const team,
  const short unsigned	  nominalcount
)
{
  Mesg(("TeamOpen nominalcount %u\n",nominalcount));

  team->count	  = 0;
  team->active	  = 0;

  team->mates = (Mate *) calloc(sizeof (Mate),nominalcount);

  for (team->count = 0; team->count < nominalcount; team->count++)
    continue;

  return (team->mates != nil(Mate *));
}

static bool		TeamStart
(
  register Team		  *const team,
  const address		  bufbytes,
  const offset		  ibytes,
  const offset		  obytes
)
{
  /*
    When generating each mate, we pass it an upstream link that
    is the downstream of the previous mate, and create a new
    downstream link that will be the next upstream.

    At each turn we obviously close the old downstream once it
    has been passed to the forked mate.

    A special case are the first and last mates; the upstreamof
    the first mate shall be the downstream of the last.  This
    goes against the grain of our main logic, where the
    upstream is expected to already exist and the downstream
    must be created.

    This means that the last and first mates are created in a
    special way.  When creating the first mate we shall create
    its upstreamlink as well as its downstream, and we shall
    save that in a special variable, last_downstream.  This we
    shall use as the downstreamof the last mate.

    We shall also keep it open in the team manager (parent
    process) because we shall use it to do the initial send of
    the read and write tokens that will circulate in the relay
    ring, activating the mates.

    Of course because of this each mate will inherit this link
    as wellas its upstream and downstream, but shall graciously
    close it.
  */

  Fd 		    last_downstream;
  Fd 		    this_upstream;
  Fd 		    this_downstream;
  Fd 		    next_upstream;

  Mesg(("TeamStart team %#x count %u bufbytes %uB\n",
    team,team->count,bufbytes));

#ifdef O_SYNC
  /* This makes performance much worse, but just in case :-> */
# if 0
#   warning using O_SYNC
    (void) fcntl(0,F_SETFL,O_SYNC);
    (void) fcntl(1,F_SETFL,O_SYNC);
# endif
#endif

#ifdef POSIX_FADV_SEQUENTIAL
# if 1
#   warning using POSIX_FADV_SEQUENTIAL

    /*
      Alleluiah! Alleluiah! We can advise the OS of which buffering
      strategy to use, which is marvellous for this program, as it does
      strictly sequential transput, and if the OS assumes by default it is
      going to do LIFO style accesses, catastrophe happens.

      However note that according to the 'man' page, 'POSIX_FADV_SEQUENTIAL'
      only affects read-ahead, not free-behind, so we need to do that with
      'POSIX_FADV_DONTNEED' later.
    */

    (void) posix_fadvise(0,(off_t) 0,(off_t) ibytes,POSIX_FADV_SEQUENTIAL);
    (void) posix_fadvise(1,(off_t) 0,(off_t) obytes,POSIX_FADV_SEQUENTIAL);

    errno = 0;
# endif
#endif

#ifdef O_STREAMING
# if 1
#   warning using O_STREAMING

    (void) fcntl(0,F_SETFL,O_STREAMING);
    (void) fcntl(1,F_SETFL,O_STREAMING);

    errno = 0;
# endif
#endif

#ifdef O_DIRECT
# if 0
#   warning using O_DIRECT

    (void) fcntl(0,F_SETFL,O_DIRECT);
    (void) fcntl(1,F_SETFL,O_DIRECT);

    errno = 0;
# endif
#endif

  (void) FdOpen(&FdIn,0,ibytes);
  (void) FdOpen(&FdOut,1,obytes);

  for (team->active = 0; team->active < team->count; team->active++)
  {
    register Mate		*const mate = team->mates+team->active;

    if (team->active == 0)
    {
      if (!StreamPipe(&last_downstream,&this_upstream))
      {
	perror("cannot open first link");
	return false;
      }

      if (!StreamPipe(&this_downstream,&next_upstream))
      {
	perror("cannot  open link");
	return false;
      }
    }
    else if (team->active < (team->count-1))
    {
      if (!StreamPipe(&this_downstream,&next_upstream))
      {
	perror("cannot  open link");
	return  false;
      }
    }
    else /*if (team->active == team->count-1)*/
    {
      FdSet(&this_downstream,&last_downstream);
      if (!FdCopy(&last_downstream,&this_downstream))
	perror("team: cannot copy last downstream");
    }

    Mesg(("TeamStart going to fork for mate %#x\n",mate));

    {
      register int 		pid;
      pid = fork();

      if (pid < 0)
      {
	perror("team: forking a mate");
	return false;
      }
      else if (pid > 0)
      {
	Mesg(("TeamStart forked mate %#x as pid %u\n",mate,pid));
	mate->pid = pid;

	if (!FdClose(&this_upstream))
	  perror("cannot close this upstream link");

	if (!FdClose(&this_downstream))
	  perror("cannot close this downstream link");

	FdSet(&this_upstream,&next_upstream);
      }
      else if (pid == 0)
      {
	pid = getpid();

	if (!FdClose(&last_downstream))
	  perror("cannot close inherited first link");

	if (!MateOpen(mate,pid,&this_upstream,&this_downstream))
	  MateStop(mate,"cannot open mate",0L);

	if (!MateStart(mate,bufbytes))
	  MateStop(mate,"cannot start mate",0L);

	if (!MateClose(mate))
	  perror("cannot close mate");

	/*NOTREACHED*/
      }
    }
  }

  if (!StreamSend(&last_downstream,TokenREAD,FdOPEN,0L))
  {
    perror("cannot send first READ token");
    return false;
  }

  if (!StreamSend(&last_downstream,TokenWRITE,FdOPEN,0L))
  {
    perror("cannot send first WRITE token");
    return false;
  }

  if (!FdClose(&last_downstream))
    perror("cannot close first link");

  return true;
}

static bool		TeamWait
(
  register Team		    *const team
)
{
  while (team->active != 0)
  {
    int 		    status;
    const int 		    matepid = wait(&status);

    if (matepid < 0)
    {
      mesg("team: no mates, believed %u left\n",team->active);
      return true;
    }
    else
    {
      register short unsigned mateno;

      for (mateno = 0; mateno < team->count; mateno++)
	if (matepid == team->mates[mateno].pid)
	{
	  team->mates[mateno].pid = -1;
	  break;
	}
    }

    --team->active;

    if (status != 0 && team->active != 0)
      return false;
  }

  return true;
}

static bool		TeamStop
(
  register Team		  *const team
)
{
  register short unsigned mateno;

  Mesg(("TeamStop team %#x\n",team));

  for (mateno = 0; mateno < team->count; mateno++)
  {
    register const Mate	  *const mate = team->mates+mateno;
    if (mate->pid >= 0)
    {
      /*kill(mate->pid,SIGKILL);*/
      --team->active;
    }
  }

  return team->active == 0;
}

static bool		TeamClose
(
  register Team		  *const team
)
{
  for ((void) team->count; team->count != 0; --team->count)
    continue;

  free(team->mates);

  team->mates = 0;

  return true;
}

static void		usage(void)
{
  fprintf(stderr,
"syntax: team [-[nvr]] [-iI[sbkmg] [-oO[sbkmg] [N[sbkm] [P]]\n\
  copies standard input to output\n\
  -n terminate at end of input volume, without prompt\n\
  -v gives ongoing report, -r final report\n\
  I is input volume size (default %"_LU"MB)\n\
  O is output volume size (default %"_LU"MB)\n\
  N is buffer size (default %luKB)\n\
  P is number of processes (default %u)\n\
  (postfix [bBsS] means *512, [kB] means *1KB,\n\
   [mM] means *1MB, [gG] means *1GB)\n\
",
      TeamHVOL_GRANULES>>(20-TeamGRANULE),
      TeamHVOL_GRANULES>>(20-TeamGRANULE),
      TeamDBUF_BYTES,
      TeamDTEAMCNT);

  exit(1);
  /*NOTREACHED*/
}

static offset		atos
(
  register const char	  *s,
  const unsigned	  sr
)
{
  register long unsigned  l;
  register unsigned	  sl;

  for (
    (void) s, l = 0;
    *s >= '0' && *s <= '9';
    s++
  )
    l = l*10 + (long unsigned) (*s-'0');

  switch (*s)
  {
  case 's': case 'S': sl = 9; break;
  case 'b': case 'B': sl = 9; break;
  case 'k': case 'K': sl = 10; break;
  case 'm': case 'M': sl = 20; break;
  case 'g': case 'G': sl = 30; break;
  default: sl = 0; break;
  }

  return (sl == sr) ? l
    : (sl > sr) ? (offset) l << (sl-sr) : (offset) l >> (sr-sl);
}

extern int		main
(
  int			  argc,
  char			  *(argv[])
)
{
  Team			  team;
  short unsigned	  teamcount;

  address		  bufsize;
  offset		  isize;
  offset		  osize;
  int			  opt;

  if (TeamGRANULE > 10)
  {
    fprintf(stderr,"team: internal error:"
      "granule size %u is bigger than 1024\n",
      1<<TeamGRANULE);

    exit(-1);
  }

  Mesg(("sizeof (offset) %u, sizeof (pointer) %u, sizeof (address) %u\n",
    sizeof (offset),sizeof (pointer),sizeof (address)));

  teamcount = TeamDTEAMCNT;
  bufsize   = TeamDBUF_BYTES;
  isize	    = TeamHVOL_GRANULES;
  osize	    = TeamHVOL_GRANULES;
  optind    = 1;

  while ((opt = getopt(argc,argv,"nvri:o:")) != -1)
    switch (opt)
    {
    case 'n':   nocontinue ^= 1;        break;
    case 'v':	verbose ^= 1;	        break;
    case 'r':	report ^= 1;	        break;

    case 'i':
      isize = atos(optarg,TeamGRANULE);
      if (isize != TeamVOLNOLIMIT
	&& (isize < TeamLVOL_GRANULES || isize > TeamHVOL_GRANULES))
      {
	fprintf(stderr,"team: invalid input volume size %"_LU"\n",isize);
	usage();
      }
      break;
  
    case 'o':
      osize = atos(optarg,TeamGRANULE);
      if (osize != TeamVOLNOLIMIT
	&& (osize < TeamLVOL_GRANULES || osize > TeamHVOL_GRANULES))
      {
	fprintf(stderr,"team: invalid output volume size %"_LU"\n",osize);
	usage();
      }
      break;
  
    default:
      usage();
    }

  argc -= optind, argv += optind;

  if (argc != 0)
  {
    bufsize = (address) atos(argv[0],TeamGRANULE);
    if (bufsize < TeamLBUF_BYTES || bufsize > TeamHBUF_BYTES)
    {
      fprintf(stderr,"team: invalid block size %luB\n",
        (long unsigned) bufsize);
      usage();
    }
    --argc, argv++;
  }

  if (argc != 0)
  {
    teamcount = atoi(argv[0]);
    if (teamcount < 2 || teamcount > TeamHTEAMCNT)
    {
      fprintf(stderr,"team: invalid # of processes %d\n",teamcount);
      usage();
    }
    --argc, argv++;
  }

  if (argc != 0)   usage();
    
  if (!TeamOpen(&team,teamcount))
  {
    mesg("team: cannot setup the team with %u mates\n",teamcount);
    return 1;
  }

  origin = time((time_t *) 0);

  if (!TeamStart(&team,bufsize,isize,osize))
  {
    mesg("team: cannot start the team\n");
    return 1;
  }

  if (!TeamWait(&team))
  {
    mesg("team: stop remaining %u mates\n",team.active);

    if (!TeamStop(&team))
    {
      mesg("team: cannot stop the team\n");
      return 1;
    }
  }

  if (!TeamClose(&team))
  {
    mesg("team: cannot close the team\n");
    return 1;
  }

  return 0;
}
