folly/Subprocess.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Subprocess library, modeled after Python's subprocess module
  19  * (http://docs.python.org/2/library/subprocess.html)
  20  *
  21  * This library defines one class (Subprocess) which represents a child
  22  * process.  Subprocess has two constructors: one that takes a vector<string>
  23  * and executes the given executable without using the shell, and one
  24  * that takes a string and executes the given command using the shell.
  25  * Subprocess allows you to redirect the child's standard input, standard
  26  * output, and standard error to/from child descriptors in the parent,
  27  * or to create communication pipes between the child and the parent.
  28  *
  29  * The simplest example is a thread-safe [1] version of the system() library
  30  * function:
  31  *    Subprocess(cmd).wait();
  32  * which executes the command using the default shell and waits for it
  33  * to complete, returning the exit status.
  34  *
  35  * A thread-safe [1] version of popen() (type="r", to read from the child):
  36  *    Subprocess proc(cmd, Subprocess::pipeStdout());
  37  *    // read from proc.stdoutFd()
  38  *    proc.wait();
  39  *
  40  * A thread-safe [1] version of popen() (type="w", to write to the child):
  41  *    Subprocess proc(cmd, Subprocess::pipeStdin());
  42  *    // write to proc.stdinFd()
  43  *    proc.wait();
  44  *
  45  * If you want to redirect both stdin and stdout to pipes, you can, but note
  46  * that you're subject to a variety of deadlocks.  You'll want to use
  47  * nonblocking I/O, like the callback version of communicate().
  48  *
  49  * The string or IOBuf-based variants of communicate() are the simplest way
  50  * to communicate with a child via its standard input, standard output, and
  51  * standard error.  They buffer everything in memory, so they are not great
  52  * for large amounts of data (or long-running processes), but they are much
  53  * simpler than the callback version.
  54  *
  55  * == A note on thread-safety ==
  56  *
  57  * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
  58  * to fork in a way that does not cause grief in multithreaded programs.
  59  *
  60  * Caveat: If your system does not have the atomic pipe2 system call, it is
  61  * not safe to concurrently call Subprocess from different threads.
  62  * Therefore, it is best to have a single thread be responsible for spawning
  63  * subprocesses.
  64  *
  65  * A particular instances of Subprocess is emphatically **not** thread-safe.
  66  * If you need to simultaneously communicate via the pipes, and interact
  67  * with the Subprocess state, your best bet is to:
  68  *  - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
  69  *  - Only interact with the Subprocess from one thread at a time.
  70  *
  71  * The current implementation of communicate() cannot be safely interrupted.
  72  * To do so correctly, one would need to use EventFD, or open a dedicated
  73  * pipe to be messaged from a different thread -- in particular, kill() will
  74  * not do, since a descendant may keep the pipes open indefinitely.
  75  *
  76  * So, once you call communicate(), you must wait for it to return, and not
  77  * touch the pipes from other threads.  closeParentFd() is emphatically
  78  * unsafe to call concurrently, and even sendSignal() is not a good idea.
  79  * You can perhaps give the Subprocess's PID to a different thread before
  80  * starting communicate(), and use that PID to send a signal without
  81  * accessing the Subprocess object.  In that case, you will need a mutex
  82  * that ensures you don't wait() before you sent said signal.  In a
  83  * nutshell, don't do this.
  84  *
  85  * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
  86  * a PID, while another thread is in waitpid(), the signal may fire either
  87  * before or after the process is reaped.  This means that your signal can,
  88  * in pathological circumstances, be delivered to the wrong process (ouch!).
  89  * To avoid this, you should only use non-blocking waits (i.e. poll()), and
  90  * make sure to serialize your signals (i.e. kill()) with the waits --
  91  * either wait & signal from the same thread, or use a mutex.
  92  */
  93
  94 #pragma once
  95
  96 #include <sys/types.h>
  97 #include <signal.h>
  98 #if __APPLE__
  99 #include <sys/wait.h>
 100 #else
 101 #include <wait.h>
 102 #endif
 103
 104 #include <exception>
 105 #include <vector>
 106 #include <string>
 107
 108 #include <boost/container/flat_map.hpp>
 109
 110 #include <folly/Exception.h>
 111 #include <folly/File.h>
 112 #include <folly/FileUtil.h>
 113 #include <folly/Function.h>
 114 #include <folly/MapUtil.h>
 115 #include <folly/Optional.h>
 116 #include <folly/Portability.h>
 117 #include <folly/Range.h>
 118 #include <folly/gen/String.h>
 119 #include <folly/io/IOBufQueue.h>
 120
 121 namespace folly {
 122
 123 /**
 124  * Class to wrap a process return code.
 125  */
 126 class Subprocess;
 127 class ProcessReturnCode {
 128   friend class Subprocess;
 129  public:
 130   enum State {
 131     // Subprocess starts in the constructor, so this state designates only
 132     // default-initialized or moved-out ProcessReturnCodes.
 133     NOT_STARTED,
 134     RUNNING,
 135     EXITED,
 136     KILLED
 137   };
 138
 139   // Default-initialized for convenience. Subprocess::returnCode() will
 140   // never produce this value.
 141   ProcessReturnCode() : ProcessReturnCode(RV_NOT_STARTED) {}
 142
 143   // Trivially copyable
 144   ProcessReturnCode(const ProcessReturnCode& p) = default;
 145   ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
 146   // Non-default move: In order for Subprocess to be movable, the "moved
 147   // out" state must not be "running", or ~Subprocess() will abort.
 148   ProcessReturnCode(ProcessReturnCode&& p) noexcept;
 149   ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
 150
 151   /**
 152    * Process state.  One of:
 153    * NOT_STARTED: process hasn't been started successfully
 154    * RUNNING: process is currently running
 155    * EXITED: process exited (successfully or not)
 156    * KILLED: process was killed by a signal.
 157    */
 158   State state() const;
 159
 160   /**
 161    * Helper wrappers around state().
 162    */
 163   bool notStarted() const { return state() == NOT_STARTED; }
 164   bool running() const { return state() == RUNNING; }
 165   bool exited() const { return state() == EXITED; }
 166   bool killed() const { return state() == KILLED; }
 167
 168   /**
 169    * Exit status.  Only valid if state() == EXITED; throws otherwise.
 170    */
 171   int exitStatus() const;
 172
 173   /**
 174    * Signal that caused the process's termination.  Only valid if
 175    * state() == KILLED; throws otherwise.
 176    */
 177   int killSignal() const;
 178
 179   /**
 180    * Was a core file generated?  Only valid if state() == KILLED; throws
 181    * otherwise.
 182    */
 183   bool coreDumped() const;
 184
 185   /**
 186    * String representation; one of
 187    * "not started"
 188    * "running"
 189    * "exited with status <status>"
 190    * "killed by signal <signal>"
 191    * "killed by signal <signal> (core dumped)"
 192    */
 193   std::string str() const;
 194
 195   /**
 196    * Helper function to enforce a precondition based on this.
 197    * Throws std::logic_error if in an unexpected state.
 198    */
 199   void enforce(State state) const;
 200  private:
 201   explicit ProcessReturnCode(int rv) : rawStatus_(rv) { }
 202   static constexpr int RV_NOT_STARTED = -2;
 203   static constexpr int RV_RUNNING = -1;
 204
 205   int rawStatus_;
 206 };
 207
 208 /**
 209  * Base exception thrown by the Subprocess methods.
 210  */
 211 class SubprocessError : public std::exception {};
 212
 213 /**
 214  * Exception thrown by *Checked methods of Subprocess.
 215  */
 216 class CalledProcessError : public SubprocessError {
 217  public:
 218   explicit CalledProcessError(ProcessReturnCode rc);
 219   ~CalledProcessError() throw() = default;
 220   const char* what() const throw() override { return what_.c_str(); }
 221   ProcessReturnCode returnCode() const { return returnCode_; }
 222  private:
 223   ProcessReturnCode returnCode_;
 224   std::string what_;
 225 };
 226
 227 /**
 228  * Exception thrown if the subprocess cannot be started.
 229  */
 230 class SubprocessSpawnError : public SubprocessError {
 231  public:
 232   SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
 233   ~SubprocessSpawnError() throw() = default;
 234   const char* what() const throw() override { return what_.c_str(); }
 235   int errnoValue() const { return errnoValue_; }
 236
 237  private:
 238   int errnoValue_;
 239   std::string what_;
 240 };
 241
 242 /**
 243  * Subprocess.
 244  */
 245 class Subprocess {
 246  public:
 247   static const int CLOSE = -1;
 248   static const int PIPE = -2;
 249   static const int PIPE_IN = -3;
 250   static const int PIPE_OUT = -4;
 251
 252   /**
 253    * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
 254    * Every derived class should include the following warning:
 255    *
 256    * DANGER: This class runs after fork in a child processes. Be fast, the
 257    * parent thread is waiting, but remember that other parent threads are
 258    * running and may mutate your state.  Avoid mutating any data belonging to
 259    * the parent.  Avoid interacting with non-POD data that originated in the
 260    * parent.  Avoid any libraries that may internally reference non-POD data.
 261    * Especially beware parent mutexes -- for example, glog's LOG() uses one.
 262    */
 263   struct DangerousPostForkPreExecCallback {
 264     virtual ~DangerousPostForkPreExecCallback() {}
 265     // This must return 0 on success, or an `errno` error code.
 266     virtual int operator()() = 0;
 267   };
 268
 269   /**
 270    * Class representing various options: file descriptor behavior, and
 271    * whether to use $PATH for searching for the executable,
 272    *
 273    * By default, we don't use $PATH, file descriptors are closed if
 274    * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
 275    * otherwise.
 276    */
 277   class Options {
 278     friend class Subprocess;
 279    public:
 280     Options() {}  // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
 281
 282     /**
 283      * Change action for file descriptor fd.
 284      *
 285      * "action" may be another file descriptor number (dup2()ed before the
 286      * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
 287      *
 288      * CLOSE: close the file descriptor in the child
 289      * PIPE_IN: open a pipe *from* the child
 290      * PIPE_OUT: open a pipe *to* the child
 291      *
 292      * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
 293      * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
 294      * other file descriptors.
 295      */
 296     Options& fd(int fd, int action);
 297
 298     /**
 299      * Shortcut to change the action for standard input.
 300      */
 301     Options& stdinFd(int action) { return fd(STDIN_FILENO, action); }
 302
 303     /**
 304      * Shortcut to change the action for standard output.
 305      */
 306     Options& stdoutFd(int action) { return fd(STDOUT_FILENO, action); }
 307
 308     /**
 309      * Shortcut to change the action for standard error.
 310      * Note that stderr(1) will redirect the standard error to the same
 311      * file descriptor as standard output; the equivalent of bash's "2>&1"
 312      */
 313     Options& stderrFd(int action) { return fd(STDERR_FILENO, action); }
 314
 315     Options& pipeStdin() { return fd(STDIN_FILENO, PIPE_IN); }
 316     Options& pipeStdout() { return fd(STDOUT_FILENO, PIPE_OUT); }
 317     Options& pipeStderr() { return fd(STDERR_FILENO, PIPE_OUT); }
 318
 319     /**
 320      * Close all other fds (other than standard input, output, error,
 321      * and file descriptors explicitly specified with fd()).
 322      *
 323      * This is potentially slow; it's generally a better idea to
 324      * set the close-on-exec flag on all file descriptors that shouldn't
 325      * be inherited by the child.
 326      *
 327      * Even with this option set, standard input, output, and error are
 328      * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
 329      * desire this.
 330      */
 331     Options& closeOtherFds() { closeOtherFds_ = true; return *this; }
 332
 333     /**
 334      * Use the search path ($PATH) when searching for the executable.
 335      */
 336     Options& usePath() { usePath_ = true; return *this; }
 337
 338     /**
 339      * Change the child's working directory, after the vfork.
 340      */
 341     Options& chdir(const std::string& dir) { childDir_ = dir; return *this; }
 342
 343 #if __linux__
 344     /**
 345      * Child will receive a signal when the parent exits.
 346      */
 347     Options& parentDeathSignal(int sig) {
 348       parentDeathSignal_ = sig;
 349       return *this;
 350     }
 351 #endif
 352
 353     /**
 354      * Child will be made a process group leader when it starts. Upside: one
 355      * can reliably all its kill non-daemonizing descendants.  Downside: the
 356      * child will not receive Ctrl-C etc during interactive use.
 357      */
 358     Options& processGroupLeader() {
 359       processGroupLeader_ = true;
 360       return *this;
 361     }
 362
 363     /**
 364      * *** READ THIS WHOLE DOCBLOCK BEFORE USING ***
 365      *
 366      * Run this callback in the child after the fork, just before the
 367      * exec(), and after the child's state has been completely set up:
 368      *  - signal handlers have been reset to default handling and unblocked
 369      *  - the working directory was set
 370      *  - closed any file descriptors specified via Options()
 371      *  - set child process flags (see code)
 372      *
 373      * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
 374      * can cause a fraction of your Subprocess launches to hang forever:
 375      *
 376      *   LOG(INFO) << "Hello from the child";
 377      *
 378      * The reason is that glog has an internal mutex. If your fork() happens
 379      * when the parent has the mutex locked, the child will wait forever.
 380      *
 381      * == GUIDELINES ==
 382      *
 383      * - Be quick -- the parent thread is blocked until you exit.
 384      * - Remember that other parent threads are running, and may mutate your
 385      *   state.
 386      * - Avoid mutating any data belonging to the parent.
 387      * - Avoid interacting with non-POD data that came from the parent.
 388      * - Avoid any libraries that may internally reference non-POD state.
 389      * - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
 390      * - Avoid invoking the parent's destructors (you can accidentally
 391      *   delete files, terminate network connections, etc).
 392      * - Read http://ewontfix.com/7/
 393      */
 394     Options& dangerousPostForkPreExecCallback(
 395         DangerousPostForkPreExecCallback* cob) {
 396       dangerousPostForkPreExecCallback_ = cob;
 397       return *this;
 398     }
 399
 400 #if __linux__
 401     /**
 402      * This is an experimental feature, it is best you don't use it at this
 403      * point of time.
 404      * Although folly would support cloning with custom flags in some form, this
 405      * API might change in the near future. So use the following assuming it is
 406      * experimental. (Apr 11, 2017)
 407      *
 408      * This unlocks Subprocess to support clone flags, many of them need
 409      * CAP_SYS_ADMIN permissions. It might also require you to go through the
 410      * implementation to understand what happens before, between and after the
 411      * fork-and-exec.
 412      *
 413      * `man 2 clone` would be a starting point for knowing about the available
 414      * flags.
 415      */
 416     using clone_flags_t = uint64_t;
 417     Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
 418       cloneFlags_ = cloneFlags;
 419       return *this;
 420     }
 421 #endif
 422
 423    private:
 424     typedef boost::container::flat_map<int, int> FdMap;
 425     FdMap fdActions_;
 426     bool closeOtherFds_{false};
 427     bool usePath_{false};
 428     std::string childDir_;  // "" keeps the parent's working directory
 429 #if __linux__
 430     int parentDeathSignal_{0};
 431 #endif
 432     bool processGroupLeader_{false};
 433     DangerousPostForkPreExecCallback*
 434       dangerousPostForkPreExecCallback_{nullptr};
 435 #if __linux__
 436     // none means `vfork()` instead of a custom `clone()`
 437     // Optional<> is used because value of '0' means do clone without any flags.
 438     Optional<clone_flags_t> cloneFlags_;
 439 #endif
 440   };
 441
 442   static Options pipeStdin() { return Options().stdinFd(PIPE); }
 443   static Options pipeStdout() { return Options().stdoutFd(PIPE); }
 444   static Options pipeStderr() { return Options().stderrFd(PIPE); }
 445
 446   // Non-copiable, but movable
 447   Subprocess(const Subprocess&) = delete;
 448   Subprocess& operator=(const Subprocess&) = delete;
 449   Subprocess(Subprocess&&) = default;
 450   Subprocess& operator=(Subprocess&&) = default;
 451
 452   /**
 453    * Create an uninitialized subprocess.
 454    *
 455    * In this state it can only be destroyed, or assigned to using the move
 456    * assignment operator.
 457    */
 458   Subprocess();
 459
 460   /**
 461    * Create a subprocess from the given arguments.  argv[0] must be listed.
 462    * If not-null, executable must be the actual executable
 463    * being used (otherwise it's the same as argv[0]).
 464    *
 465    * If env is not-null, it must contain name=value strings to be used
 466    * as the child's environment; otherwise, we inherit the environment
 467    * from the parent.  env must be null if options.usePath is set.
 468    */
 469   explicit Subprocess(
 470       const std::vector<std::string>& argv,
 471       const Options& options = Options(),
 472       const char* executable = nullptr,
 473       const std::vector<std::string>* env = nullptr);
 474   ~Subprocess();
 475
 476   /**
 477    * Create a subprocess run as a shell command (as shell -c 'command')
 478    *
 479    * The shell to use is taken from the environment variable $SHELL,
 480    * or /bin/sh if $SHELL is unset.
 481    */
 482   FOLLY_DEPRECATED("Prefer not running in a shell or use `shellify`.")
 483   explicit Subprocess(
 484       const std::string& cmd,
 485       const Options& options = Options(),
 486       const std::vector<std::string>* env = nullptr);
 487
 488   ////
 489   //// The methods below only manipulate the process state, and do not
 490   //// affect its communication pipes.
 491   ////
 492
 493   /**
 494    * Return the child's pid, or -1 if the child wasn't successfully spawned
 495    * or has already been wait()ed upon.
 496    */
 497   pid_t pid() const;
 498
 499   /**
 500    * Return the child's status (as per wait()) if the process has already
 501    * been waited on, -1 if the process is still running, or -2 if the
 502    * process hasn't been successfully started.  NOTE that this does not call
 503    * waitpid() or Subprocess::poll(), but simply returns the status stored
 504    * in the Subprocess object.
 505    */
 506   ProcessReturnCode returnCode() const { return returnCode_; }
 507
 508   /**
 509    * Poll the child's status and return it. Return the exit status if the
 510    * subprocess had quit, or RUNNING otherwise.  Throws an std::logic_error
 511    * if called on a Subprocess whose status is no longer RUNNING.  No other
 512    * exceptions are possible.  Aborts on egregious violations of contract,
 513    * e.g. if you wait for the underlying process without going through this
 514    * Subprocess instance.
 515    */
 516   ProcessReturnCode poll();
 517
 518   /**
 519    * Poll the child's status.  If the process is still running, return false.
 520    * Otherwise, return true if the process exited with status 0 (success),
 521    * or throw CalledProcessError if the process exited with a non-zero status.
 522    */
 523   bool pollChecked();
 524
 525   /**
 526    * Wait for the process to terminate and return its status.  Like poll(),
 527    * the only exception this can throw is std::logic_error if you call this
 528    * on a Subprocess whose status is RUNNING.  Aborts on egregious
 529    * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
 530    */
 531   ProcessReturnCode wait();
 532
 533   /**
 534    * Wait for the process to terminate, throw if unsuccessful.
 535    */
 536   void waitChecked();
 537
 538   /**
 539    * Send a signal to the child.  Shortcuts for the commonly used Unix
 540    * signals are below.
 541    */
 542   void sendSignal(int signal);
 543   void terminate() { sendSignal(SIGTERM); }
 544   void kill() { sendSignal(SIGKILL); }
 545
 546   ////
 547   //// The methods below only affect the process's communication pipes, but
 548   //// not its return code or state (they do not poll() or wait()).
 549   ////
 550
 551   /**
 552    * Communicate with the child until all pipes to/from the child are closed.
 553    *
 554    * The input buffer is written to the process' stdin pipe, and data is read
 555    * from the stdout and stderr pipes.  Non-blocking I/O is performed on all
 556    * pipes simultaneously to avoid deadlocks.
 557    *
 558    * The stdin pipe will be closed after the full input buffer has been written.
 559    * An error will be thrown if a non-empty input buffer is supplied but stdin
 560    * was not configured as a pipe.
 561    *
 562    * Returns a pair of buffers containing the data read from stdout and stderr.
 563    * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
 564    * for the respective buffer.
 565    *
 566    * Note that communicate() and communicateIOBuf() both return when all
 567    * pipes to/from the child are closed; the child might stay alive after
 568    * that, so you must still wait().
 569    *
 570    * communicateIOBuf() uses IOBufQueue for buffering (which has the
 571    * advantage that it won't try to allocate all data at once), but it does
 572    * store the subprocess's entire output in memory before returning.
 573    *
 574    * communicate() uses strings for simplicity.
 575    */
 576   std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
 577       IOBufQueue input = IOBufQueue());
 578
 579   std::pair<std::string, std::string> communicate(
 580       StringPiece input = StringPiece());
 581
 582   /**
 583    * Communicate with the child until all pipes to/from the child are closed.
 584    *
 585    * == Semantics ==
 586    *
 587    * readCallback(pfd, cfd) will be called whenever there's data available
 588    * on any pipe *from* the child (PIPE_OUT).  pfd is the file descriptor
 589    * in the parent (that you use to read from); cfd is the file descriptor
 590    * in the child (used for identifying the stream; 1 = child's standard
 591    * output, 2 = child's standard error, etc)
 592    *
 593    * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
 594    * writable (PIPE_IN).  pfd is the file descriptor in the parent (that you
 595    * use to write to); cfd is the file descriptor in the child (used for
 596    * identifying the stream; 0 = child's standard input, etc)
 597    *
 598    * The read and write callbacks must read from / write to pfd and return
 599    * false during normal operation.  Return true to tell communicate() to
 600    * close the pipe.  For readCallback, this might send SIGPIPE to the
 601    * child, or make its writes fail with EPIPE, so you should generally
 602    * avoid returning true unless you've reached end-of-file.
 603    *
 604    * communicate() returns when all pipes to/from the child are closed; the
 605    * child might stay alive after that, so you must still wait().
 606    * Conversely, the child may quit long before its pipes are closed, since
 607    * its descendants can keep them alive forever.
 608    *
 609    * Most users won't need to use this callback version; the simpler version
 610    * of communicate (which buffers data in memory) will probably work fine.
 611    *
 612    * == Things you must get correct ==
 613    *
 614    * 1) You MUST consume all data passed to readCallback (or return true to
 615    * close the pipe).  Similarly, you MUST write to a writable pipe (or
 616    * return true to close the pipe).  To do otherwise is an error that can
 617    * result in a deadlock.  You must do this even for pipes you are not
 618    * interested in.
 619    *
 620    * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
 621    * and set errno to EAGAIN (in which case you should return false).  Use
 622    * readNoInt() from FileUtil.h to handle interrupted reads for you.
 623    *
 624    * 3) Your callbacks MUST NOT call any of the Subprocess methods that
 625    * manipulate the pipe FDs.  Check the docblocks, but, for example,
 626    * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
 627    * are safe.  Stick to reading/writing from pfd, as appropriate.
 628    *
 629    * == Good to know ==
 630    *
 631    * 1) See ReadLinesCallback for an easy way to consume the child's output
 632    * streams line-by-line (or tokenized by another delimiter).
 633    *
 634    * 2) "Wait until the descendants close the pipes" is usually the behavior
 635    * you want, since the descendants may have something to say even if the
 636    * immediate child is dead.  If you need to be able to force-close all
 637    * parent FDs, communicate() will NOT work for you.  Do it your own way by
 638    * using takeOwnershipOfPipes().
 639    *
 640    * Why not? You can return "true" from your callbacks to sever active
 641    * pipes, but inactive ones can remain open indefinitely.  It is
 642    * impossible to safely close inactive pipes while another thread is
 643    * blocked in communicate().  This is BY DESIGN.  Racing communicate()'s
 644    * read/write callbacks can result in wrong I/O and data corruption.  This
 645    * class would need internal synchronization and timeouts, a poor and
 646    * expensive implementation choice, in order to make closeParentFd()
 647    * thread-safe.
 648    */
 649   using FdCallback = folly::Function<bool(int, int)>;
 650   void communicate(FdCallback readCallback, FdCallback writeCallback);
 651
 652   /**
 653    * A readCallback for Subprocess::communicate() that helps you consume
 654    * lines (or other delimited pieces) from your subprocess's file
 655    * descriptors.  Use the readLinesCallback() helper to get template
 656    * deduction.  For example:
 657    *
 658    *   subprocess.communicate(
 659    *     Subprocess::readLinesCallback(
 660    *       [](int fd, folly::StringPiece s) {
 661    *         std::cout << fd << " said: " << s;
 662    *         return false;  // Keep reading from the child
 663    *       }
 664    *     ),
 665    *     [](int pdf, int cfd){ return true; }  // Don't write to the child
 666    *   );
 667    *
 668    * If a file line exceeds maxLineLength, your callback will get some
 669    * initial chunks of maxLineLength with no trailing delimiters.  The final
 670    * chunk of a line is delimiter-terminated iff the delimiter was present
 671    * in the input.  In particular, the last line in a file always lacks a
 672    * delimiter -- so if a file ends on a delimiter, the final line is empty.
 673    *
 674    * Like a regular communicate() callback, your fdLineCb() normally returns
 675    * false.  It may return true to tell Subprocess to close the underlying
 676    * file descriptor.  The child process may then receive SIGPIPE or get
 677    * EPIPE errors on writes.
 678    */
 679   template <class Callback>
 680   class ReadLinesCallback {
 681    private:
 682     // Binds an FD to the client-provided FD+line callback
 683     struct StreamSplitterCallback {
 684       StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) { }
 685       // The return value semantics are inverted vs StreamSplitter
 686       bool operator()(StringPiece s) { return !cb_(fd_, s); }
 687       Callback& cb_;
 688       int fd_;
 689     };
 690     typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
 691    public:
 692     explicit ReadLinesCallback(
 693       Callback&& fdLineCb,
 694       uint64_t maxLineLength = 0,  // No line length limit by default
 695       char delimiter = '\n',
 696       uint64_t bufSize = 1024
 697     ) : fdLineCb_(std::forward<Callback>(fdLineCb)),
 698         maxLineLength_(maxLineLength),
 699         delimiter_(delimiter),
 700         bufSize_(bufSize) {}
 701
 702     bool operator()(int pfd, int cfd) {
 703       // Make a splitter for this cfd if it doesn't already exist
 704       auto it = fdToSplitter_.find(cfd);
 705       auto& splitter = (it != fdToSplitter_.end()) ? it->second
 706         : fdToSplitter_.emplace(cfd, LineSplitter(
 707             delimiter_, StreamSplitterCallback(fdLineCb_, cfd), maxLineLength_
 708           )).first->second;
 709       // Read as much as we can from this FD
 710       char buf[bufSize_];
 711       while (true) {
 712         ssize_t ret = readNoInt(pfd, buf, bufSize_);
 713         if (ret == -1 && errno == EAGAIN) {  // No more data for now
 714           return false;
 715         }
 716         checkUnixError(ret, "read");
 717         if (ret == 0) {  // Reached end-of-file
 718           splitter.flush();  // Ignore return since the file is over anyway
 719           return true;
 720         }
 721         if (!splitter(StringPiece(buf, ret))) {
 722           return true;  // The callback told us to stop
 723         }
 724       }
 725     }
 726
 727    private:
 728     Callback fdLineCb_;
 729     const uint64_t maxLineLength_;
 730     const char delimiter_;
 731     const uint64_t bufSize_;
 732     // We lazily make splitters for all cfds that get used.
 733     std::unordered_map<int, LineSplitter> fdToSplitter_;
 734   };
 735
 736   // Helper to enable template deduction
 737   template <class Callback>
 738   static auto readLinesCallback(
 739       Callback&& fdLineCb,
 740       uint64_t maxLineLength = 0, // No line length limit by default
 741       char delimiter = '\n',
 742       uint64_t bufSize = 1024)
 743       -> ReadLinesCallback<typename std::decay<Callback>::type> {
 744     return ReadLinesCallback<typename std::decay<Callback>::type>(
 745         std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
 746   }
 747
 748   /**
 749    * communicate() callbacks can use this to temporarily enable/disable
 750    * notifications (callbacks) for a pipe to/from the child.  By default,
 751    * all are enabled.  Useful for "chatty" communication -- you want to
 752    * disable write callbacks until you receive the expected message.
 753    *
 754    * Disabling a pipe does not free you from the requirement to consume all
 755    * incoming data.  Failing to do so will easily create deadlock bugs.
 756    *
 757    * Throws if the childFd is not known.
 758    */
 759   void enableNotifications(int childFd, bool enabled);
 760
 761   /**
 762    * Are notifications for one pipe to/from child enabled?  Throws if the
 763    * childFd is not known.
 764    */
 765   bool notificationsEnabled(int childFd) const;
 766
 767   ////
 768   //// The following methods are meant for the cases when communicate() is
 769   //// not suitable.  You should not need them when you call communicate(),
 770   //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
 771   //// takeOwnershipOfPipes() from a communicate() callback.
 772   ////
 773
 774   /**
 775    * Close the parent file descriptor given a file descriptor in the child.
 776    * DO NOT USE from communicate() callbacks; make them return true instead.
 777    */
 778   void closeParentFd(int childFd);
 779
 780   /**
 781    * Set all pipes from / to child to be non-blocking.  communicate() does
 782    * this for you.
 783    */
 784   void setAllNonBlocking();
 785
 786   /**
 787    * Get parent file descriptor corresponding to the given file descriptor
 788    * in the child.  Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
 789    * Do not close() the returned file descriptor; use closeParentFd, above.
 790    */
 791   int parentFd(int childFd) const {
 792     return pipes_[findByChildFd(childFd)].pipe.fd();
 793   }
 794   int stdinFd() const { return parentFd(0); }
 795   int stdoutFd() const { return parentFd(1); }
 796   int stderrFd() const { return parentFd(2); }
 797
 798   /**
 799    * The child's pipes are logically separate from the process metadata
 800    * (they may even be kept alive by the child's descendants).  This call
 801    * lets you manage the pipes' lifetime separetely from the lifetime of the
 802    * child process.
 803    *
 804    * After this call, the Subprocess instance will have no knowledge of
 805    * these pipes, and the caller assumes responsibility for managing their
 806    * lifetimes.  Pro-tip: prefer to explicitly close() the pipes, since
 807    * folly::File would otherwise silently suppress I/O errors.
 808    *
 809    * No, you may NOT call this from a communicate() callback.
 810    */
 811   struct ChildPipe {
 812     ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {}
 813     int childFd;
 814     folly::File pipe;  // Owns the parent FD
 815   };
 816   std::vector<ChildPipe> takeOwnershipOfPipes();
 817
 818  private:
 819   static const int RV_RUNNING = ProcessReturnCode::RV_RUNNING;
 820   static const int RV_NOT_STARTED = ProcessReturnCode::RV_NOT_STARTED;
 821
 822   // spawn() sets up a pipe to read errors from the child,
 823   // then calls spawnInternal() to do the bulk of the work.  Once
 824   // spawnInternal() returns it reads the error pipe to see if the child
 825   // encountered any errors.
 826   void spawn(
 827       std::unique_ptr<const char*[]> argv,
 828       const char* executable,
 829       const Options& options,
 830       const std::vector<std::string>* env);
 831   void spawnInternal(
 832       std::unique_ptr<const char*[]> argv,
 833       const char* executable,
 834       Options& options,
 835       const std::vector<std::string>* env,
 836       int errFd);
 837
 838   // Actions to run in child.
 839   // Note that this runs after vfork(), so tread lightly.
 840   // Returns 0 on success, or an errno value on failure.
 841   int prepareChild(const Options& options,
 842                    const sigset_t* sigmask,
 843                    const char* childDir) const;
 844   int runChild(const char* executable, char** argv, char** env,
 845                const Options& options) const;
 846
 847   /**
 848    * Read from the error pipe, and throw SubprocessSpawnError if the child
 849    * failed before calling exec().
 850    */
 851   void readChildErrorPipe(int pfd, const char* executable);
 852
 853   // Returns an index into pipes_. Throws std::invalid_argument if not found.
 854   size_t findByChildFd(const int childFd) const;
 855
 856   pid_t pid_{-1};
 857   ProcessReturnCode returnCode_{RV_NOT_STARTED};
 858
 859   /**
 860    * Represents a pipe between this process, and the child process (or its
 861    * descendant).  To interact with these pipes, you can use communicate(),
 862    * or use parentFd() and related methods, or separate them from the
 863    * Subprocess instance entirely via takeOwnershipOfPipes().
 864    */
 865   struct Pipe : private boost::totally_ordered<Pipe> {
 866     folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
 867     int childFd = -1; // Identifies the pipe: what FD is this in the child?
 868     int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
 869     bool enabled = true; // Are notifications enabled in communicate()?
 870
 871     bool operator<(const Pipe& other) const {
 872       return childFd < other.childFd;
 873     }
 874     bool operator==(const Pipe& other) const {
 875       return childFd == other.childFd;
 876     }
 877   };
 878
 879   // Populated at process start according to fdActions, empty after
 880   // takeOwnershipOfPipes().  Sorted by childFd.  Can only have elements
 881   // erased, but not inserted, after being populated.
 882   //
 883   // The number of pipes between parent and child is assumed to be small,
 884   // so we're happy with a vector here, even if it means linear erase.
 885   std::vector<Pipe> pipes_;
 886 };
 887
 888 }  // namespace folly