folly/Subprocess.h

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Subprocess library, modeled after Python's subprocess module
  19  * (http://docs.python.org/2/library/subprocess.html)
  20  *
  21  * This library defines one class (Subprocess) which represents a child
  22  * process.  Subprocess has two constructors: one that takes a vector<string>
  23  * and executes the given executable without using the shell, and one
  24  * that takes a string and executes the given command using the shell.
  25  * Subprocess allows you to redirect the child's standard input, standard
  26  * output, and standard error to/from child descriptors in the parent,
  27  * or to create communication pipes between the child and the parent.
  28  *
  29  * The simplest example is a thread-safe [1] version of the system() library
  30  * function:
  31  *    Subprocess(cmd).wait();
  32  * which executes the command using the default shell and waits for it
  33  * to complete, returning the exit status.
  34  *
  35  * A thread-safe [1] version of popen() (type="r", to read from the child):
  36  *    Subprocess proc(cmd, Subprocess::pipeStdout());
  37  *    // read from proc.stdout()
  38  *    proc.wait();
  39  *
  40  * A thread-safe [1] version of popen() (type="w", to write to the child):
  41  *    Subprocess proc(cmd, Subprocess::pipeStdin());
  42  *    // write to proc.stdin()
  43  *    proc.wait();
  44  *
  45  * If you want to redirect both stdin and stdout to pipes, you can, but note
  46  * that you're subject to a variety of deadlocks.  You'll want to use
  47  * nonblocking I/O, like the callback version of communicate().
  48  *
  49  * The string or IOBuf-based variants of communicate() are the simplest way
  50  * to communicate with a child via its standard input, standard output, and
  51  * standard error.  They buffer everything in memory, so they are not great
  52  * for large amounts of data (or long-running processes), but they are much
  53  * simpler than the callback version.
  54  *
  55  * == A note on thread-safety ==
  56  *
  57  * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
  58  * to fork in a way that does not cause grief in multithreaded programs.
  59  *
  60  * Caveat: If your system does not have the atomic pipe2 system call, it is
  61  * not safe to concurrently call Subprocess from different threads.
  62  * Therefore, it is best to have a single thread be responsible for spawning
  63  * subprocesses.
  64  *
  65  * A particular instances of Subprocess is emphatically **not** thread-safe.
  66  * If you need to simultaneously communicate via the pipes, and interact
  67  * with the Subprocess state, your best bet is to:
  68  *  - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
  69  *  - Only interact with the Subprocess from one thread at a time.
  70  *
  71  * The current implementation of communicate() cannot be safely interrupted.
  72  * To do so correctly, one would need to use EventFD, or open a dedicated
  73  * pipe to be messaged from a different thread -- in particular, kill() will
  74  * not do, since a descendant may keep the pipes open indefinitely.
  75  *
  76  * So, once you call communicate(), you must wait for it to return, and not
  77  * touch the pipes from other threads.  closeParentFd() is emphatically
  78  * unsafe to call concurrently, and even sendSignal() is not a good idea.
  79  * You can perhaps give the Subprocess's PID to a different thread before
  80  * starting communicate(), and use that PID to send a signal without
  81  * accessing the Subprocess object.  In that case, you will need a mutex
  82  * that ensures you don't wait() before you sent said signal.  In a
  83  * nutshell, don't do this.
  84  *
  85  * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
  86  * a PID, while another thread is in waitpid(), the signal may fire either
  87  * before or after the process is reaped.  This means that your signal can,
  88  * in pathological circumstances, be delivered to the wrong process (ouch!).
  89  * To avoid this, you should only use non-blocking waits (i.e. poll()), and
  90  * make sure to serialize your signals (i.e. kill()) with the waits --
  91  * either wait & signal from the same thread, or use a mutex.
  92  */
  93 #ifndef FOLLY_SUBPROCESS_H_
  94 #define FOLLY_SUBPROCESS_H_
  95
  96 #include <sys/types.h>
  97 #include <signal.h>
  98 #if __APPLE__
  99 #include <sys/wait.h>
 100 #else
 101 #include <wait.h>
 102 #endif
 103
 104 #include <exception>
 105 #include <vector>
 106 #include <string>
 107
 108 #include <boost/container/flat_map.hpp>
 109 #include <boost/operators.hpp>
 110 #include <boost/noncopyable.hpp>
 111
 112 #include <folly/File.h>
 113 #include <folly/FileUtil.h>
 114 #include <folly/gen/String.h>
 115 #include <folly/io/IOBufQueue.h>
 116 #include <folly/MapUtil.h>
 117 #include <folly/Portability.h>
 118 #include <folly/Range.h>
 119
 120 namespace folly {
 121
 122 /**
 123  * Class to wrap a process return code.
 124  */
 125 class Subprocess;
 126 class ProcessReturnCode {
 127   friend class Subprocess;
 128  public:
 129   enum State {
 130     NOT_STARTED,
 131     RUNNING,
 132     EXITED,
 133     KILLED
 134   };
 135
 136   /**
 137    * Process state.  One of:
 138    * NOT_STARTED: process hasn't been started successfully
 139    * RUNNING: process is currently running
 140    * EXITED: process exited (successfully or not)
 141    * KILLED: process was killed by a signal.
 142    */
 143   State state() const;
 144
 145   /**
 146    * Helper wrappers around state().
 147    */
 148   bool notStarted() const { return state() == NOT_STARTED; }
 149   bool running() const { return state() == RUNNING; }
 150   bool exited() const { return state() == EXITED; }
 151   bool killed() const { return state() == KILLED; }
 152
 153   /**
 154    * Exit status.  Only valid if state() == EXITED; throws otherwise.
 155    */
 156   int exitStatus() const;
 157
 158   /**
 159    * Signal that caused the process's termination.  Only valid if
 160    * state() == KILLED; throws otherwise.
 161    */
 162   int killSignal() const;
 163
 164   /**
 165    * Was a core file generated?  Only valid if state() == KILLED; throws
 166    * otherwise.
 167    */
 168   bool coreDumped() const;
 169
 170   /**
 171    * String representation; one of
 172    * "not started"
 173    * "running"
 174    * "exited with status <status>"
 175    * "killed by signal <signal>"
 176    * "killed by signal <signal> (core dumped)"
 177    */
 178   std::string str() const;
 179
 180   /**
 181    * Helper function to enforce a precondition based on this.
 182    * Throws std::logic_error if in an unexpected state.
 183    */
 184   void enforce(State state) const;
 185  private:
 186   explicit ProcessReturnCode(int rv) : rawStatus_(rv) { }
 187   static constexpr int RV_NOT_STARTED = -2;
 188   static constexpr int RV_RUNNING = -1;
 189
 190   int rawStatus_;
 191 };
 192
 193 /**
 194  * Base exception thrown by the Subprocess methods.
 195  */
 196 class SubprocessError : public std::exception {};
 197
 198 /**
 199  * Exception thrown by *Checked methods of Subprocess.
 200  */
 201 class CalledProcessError : public SubprocessError {
 202  public:
 203   explicit CalledProcessError(ProcessReturnCode rc);
 204   ~CalledProcessError() throw() { }
 205   const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
 206   ProcessReturnCode returnCode() const { return returnCode_; }
 207  private:
 208   ProcessReturnCode returnCode_;
 209   std::string what_;
 210 };
 211
 212 /**
 213  * Exception thrown if the subprocess cannot be started.
 214  */
 215 class SubprocessSpawnError : public SubprocessError {
 216  public:
 217   SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
 218   ~SubprocessSpawnError() throw() {}
 219   const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
 220   int errnoValue() const { return errnoValue_; }
 221
 222  private:
 223   int errnoValue_;
 224   std::string what_;
 225 };
 226
 227 /**
 228  * Subprocess.
 229  */
 230 class Subprocess : private boost::noncopyable {
 231  public:
 232   static const int CLOSE = -1;
 233   static const int PIPE = -2;
 234   static const int PIPE_IN = -3;
 235   static const int PIPE_OUT = -4;
 236
 237   /**
 238    * Class representing various options: file descriptor behavior, and
 239    * whether to use $PATH for searching for the executable,
 240    *
 241    * By default, we don't use $PATH, file descriptors are closed if
 242    * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
 243    * otherwise.
 244    */
 245   class Options : private boost::orable<Options> {
 246     friend class Subprocess;
 247    public:
 248     Options() {}  // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
 249
 250     /**
 251      * Change action for file descriptor fd.
 252      *
 253      * "action" may be another file descriptor number (dup2()ed before the
 254      * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
 255      *
 256      * CLOSE: close the file descriptor in the child
 257      * PIPE_IN: open a pipe *from* the child
 258      * PIPE_OUT: open a pipe *to* the child
 259      *
 260      * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
 261      * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
 262      * other file descriptors.
 263      */
 264     Options& fd(int fd, int action);
 265
 266     /**
 267      * Shortcut to change the action for standard input.
 268      */
 269     Options& stdin(int action) { return fd(STDIN_FILENO, action); }
 270
 271     /**
 272      * Shortcut to change the action for standard output.
 273      */
 274     Options& stdout(int action) { return fd(STDOUT_FILENO, action); }
 275
 276     /**
 277      * Shortcut to change the action for standard error.
 278      * Note that stderr(1) will redirect the standard error to the same
 279      * file descriptor as standard output; the equivalent of bash's "2>&1"
 280      */
 281     Options& stderr(int action) { return fd(STDERR_FILENO, action); }
 282
 283     Options& pipeStdin() { return fd(STDIN_FILENO, PIPE_IN); }
 284     Options& pipeStdout() { return fd(STDOUT_FILENO, PIPE_OUT); }
 285     Options& pipeStderr() { return fd(STDERR_FILENO, PIPE_OUT); }
 286
 287     /**
 288      * Close all other fds (other than standard input, output, error,
 289      * and file descriptors explicitly specified with fd()).
 290      *
 291      * This is potentially slow; it's generally a better idea to
 292      * set the close-on-exec flag on all file descriptors that shouldn't
 293      * be inherited by the child.
 294      *
 295      * Even with this option set, standard input, output, and error are
 296      * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
 297      * desire this.
 298      */
 299     Options& closeOtherFds() { closeOtherFds_ = true; return *this; }
 300
 301     /**
 302      * Use the search path ($PATH) when searching for the executable.
 303      */
 304     Options& usePath() { usePath_ = true; return *this; }
 305
 306     /**
 307      * Change the child's working directory, after the vfork.
 308      */
 309     Options& chdir(const std::string& dir) { childDir_ = dir; return *this; }
 310
 311 #if __linux__
 312     /**
 313      * Child will receive a signal when the parent exits.
 314      */
 315     Options& parentDeathSignal(int sig) {
 316       parentDeathSignal_ = sig;
 317       return *this;
 318     }
 319 #endif
 320
 321     /**
 322      * Child will be made a process group leader when it starts. Upside: one
 323      * can reliably all its kill non-daemonizing descendants.  Downside: the
 324      * child will not receive Ctrl-C etc during interactive use.
 325      */
 326     Options& processGroupLeader() {
 327       processGroupLeader_ = true;
 328       return *this;
 329     }
 330
 331     /**
 332      * Helpful way to combine Options.
 333      */
 334     Options& operator|=(const Options& other);
 335
 336    private:
 337     typedef boost::container::flat_map<int, int> FdMap;
 338     FdMap fdActions_;
 339     bool closeOtherFds_{false};
 340     bool usePath_{false};
 341     std::string childDir_;  // "" keeps the parent's working directory
 342 #if __linux__
 343     int parentDeathSignal_{0};
 344 #endif
 345     bool processGroupLeader_{false};
 346   };
 347
 348   static Options pipeStdin() { return Options().stdin(PIPE); }
 349   static Options pipeStdout() { return Options().stdout(PIPE); }
 350   static Options pipeStderr() { return Options().stderr(PIPE); }
 351
 352   /**
 353    * Create a subprocess from the given arguments.  argv[0] must be listed.
 354    * If not-null, executable must be the actual executable
 355    * being used (otherwise it's the same as argv[0]).
 356    *
 357    * If env is not-null, it must contain name=value strings to be used
 358    * as the child's environment; otherwise, we inherit the environment
 359    * from the parent.  env must be null if options.usePath is set.
 360    */
 361   explicit Subprocess(
 362       const std::vector<std::string>& argv,
 363       const Options& options = Options(),
 364       const char* executable = nullptr,
 365       const std::vector<std::string>* env = nullptr);
 366   ~Subprocess();
 367
 368   /**
 369    * Create a subprocess run as a shell command (as shell -c 'command')
 370    *
 371    * The shell to use is taken from the environment variable $SHELL,
 372    * or /bin/sh if $SHELL is unset.
 373    */
 374   explicit Subprocess(
 375       const std::string& cmd,
 376       const Options& options = Options(),
 377       const std::vector<std::string>* env = nullptr);
 378
 379   ////
 380   //// The methods below only manipulate the process state, and do not
 381   //// affect its communication pipes.
 382   ////
 383
 384   /**
 385    * Return the child's pid, or -1 if the child wasn't successfully spawned
 386    * or has already been wait()ed upon.
 387    */
 388   pid_t pid() const;
 389
 390   /**
 391    * Return the child's status (as per wait()) if the process has already
 392    * been waited on, -1 if the process is still running, or -2 if the
 393    * process hasn't been successfully started.  NOTE that this does not call
 394    * waitpid() or Subprocess::poll(), but simply returns the status stored
 395    * in the Subprocess object.
 396    */
 397   ProcessReturnCode returnCode() const { return returnCode_; }
 398
 399   /**
 400    * Poll the child's status and return it, return -1 if the process
 401    * is still running.  NOTE that it is illegal to call poll again after
 402    * poll indicated that the process has terminated, or to call poll on a
 403    * process that hasn't been successfully started (the constructor threw an
 404    * exception).
 405    */
 406   ProcessReturnCode poll();
 407
 408   /**
 409    * Poll the child's status.  If the process is still running, return false.
 410    * Otherwise, return true if the process exited with status 0 (success),
 411    * or throw CalledProcessError if the process exited with a non-zero status.
 412    */
 413   bool pollChecked();
 414
 415   /**
 416    * Wait for the process to terminate and return its status.
 417    * Similarly to poll, it is illegal to call wait after the process
 418    * has already been reaped or if the process has not successfully started.
 419    */
 420   ProcessReturnCode wait();
 421
 422   /**
 423    * Wait for the process to terminate, throw if unsuccessful.
 424    */
 425   void waitChecked();
 426
 427   /**
 428    * Send a signal to the child.  Shortcuts for the commonly used Unix
 429    * signals are below.
 430    */
 431   void sendSignal(int signal);
 432   void terminate() { sendSignal(SIGTERM); }
 433   void kill() { sendSignal(SIGKILL); }
 434
 435   ////
 436   //// The methods below only affect the process's communication pipes, but
 437   //// not its return code or state (they do not poll() or wait()).
 438   ////
 439
 440   /**
 441    * Communicate with the child until all pipes to/from the child are closed.
 442    *
 443    * The input buffer is written to the process' stdin pipe, and data is read
 444    * from the stdout and stderr pipes.  Non-blocking I/O is performed on all
 445    * pipes simultaneously to avoid deadlocks.
 446    *
 447    * The stdin pipe will be closed after the full input buffer has been written.
 448    * An error will be thrown if a non-empty input buffer is supplied but stdin
 449    * was not configured as a pipe.
 450    *
 451    * Returns a pair of buffers containing the data read from stdout and stderr.
 452    * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
 453    * for the respective buffer.
 454    *
 455    * Note that communicate() and communicateIOBuf() both return when all
 456    * pipes to/from the child are closed; the child might stay alive after
 457    * that, so you must still wait().
 458    *
 459    * communicateIOBuf() uses IOBufQueue for buffering (which has the
 460    * advantage that it won't try to allocate all data at once), but it does
 461    * store the subprocess's entire output in memory before returning.
 462    *
 463    * communicate() uses strings for simplicity.
 464    */
 465   std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
 466       IOBufQueue input = IOBufQueue());
 467
 468   std::pair<std::string, std::string> communicate(
 469       StringPiece input = StringPiece());
 470
 471   /**
 472    * Communicate with the child until all pipes to/from the child are closed.
 473    *
 474    * == Semantics ==
 475    *
 476    * readCallback(pfd, cfd) will be called whenever there's data available
 477    * on any pipe *from* the child (PIPE_OUT).  pfd is the file descriptor
 478    * in the parent (that you use to read from); cfd is the file descriptor
 479    * in the child (used for identifying the stream; 1 = child's standard
 480    * output, 2 = child's standard error, etc)
 481    *
 482    * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
 483    * writable (PIPE_IN).  pfd is the file descriptor in the parent (that you
 484    * use to write to); cfd is the file descriptor in the child (used for
 485    * identifying the stream; 0 = child's standard input, etc)
 486    *
 487    * The read and write callbacks must read from / write to pfd and return
 488    * false during normal operation.  Return true to tell communicate() to
 489    * close the pipe.  For readCallback, this might send SIGPIPE to the
 490    * child, or make its writes fail with EPIPE, so you should generally
 491    * avoid returning true unless you've reached end-of-file.
 492    *
 493    * communicate() returns when all pipes to/from the child are closed; the
 494    * child might stay alive after that, so you must still wait().
 495    * Conversely, the child may quit long before its pipes are closed, since
 496    * its descendants can keep them alive forever.
 497    *
 498    * Most users won't need to use this callback version; the simpler version
 499    * of communicate (which buffers data in memory) will probably work fine.
 500    *
 501    * == Things you must get correct ==
 502    *
 503    * 1) You MUST consume all data passed to readCallback (or return true to
 504    * close the pipe).  Similarly, you MUST write to a writable pipe (or
 505    * return true to close the pipe).  To do otherwise is an error that can
 506    * result in a deadlock.  You must do this even for pipes you are not
 507    * interested in.
 508    *
 509    * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
 510    * and set errno to EAGAIN (in which case you should return false).  Use
 511    * readNoInt() from FileUtil.h to handle interrupted reads for you.
 512    *
 513    * 3) Your callbacks MUST NOT call any of the Subprocess methods that
 514    * manipulate the pipe FDs.  Check the docblocks, but, for example,
 515    * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
 516    * are safe.  Stick to reading/writing from pfd, as appropriate.
 517    *
 518    * == Good to know ==
 519    *
 520    * 1) See ReadLinesCallback for an easy way to consume the child's output
 521    * streams line-by-line (or tokenized by another delimiter).
 522    *
 523    * 2) "Wait until the descendants close the pipes" is usually the behavior
 524    * you want, since the descendants may have something to say even if the
 525    * immediate child is dead.  If you need to be able to force-close all
 526    * parent FDs, communicate() will NOT work for you.  Do it your own way by
 527    * using takeOwnershipOfPipes().
 528    *
 529    * Why not? You can return "true" from your callbacks to sever active
 530    * pipes, but inactive ones can remain open indefinitely.  It is
 531    * impossible to safely close inactive pipes while another thread is
 532    * blocked in communicate().  This is BY DESIGN.  Racing communicate()'s
 533    * read/write callbacks can result in wrong I/O and data corruption.  This
 534    * class would need internal synchronization and timeouts, a poor and
 535    * expensive implementation choice, in order to make closeParentFd()
 536    * thread-safe.
 537    */
 538   typedef std::function<bool(int, int)> FdCallback;
 539   void communicate(FdCallback readCallback, FdCallback writeCallback);
 540
 541   /**
 542    * A readCallback for Subprocess::communicate() that helps you consume
 543    * lines (or other delimited pieces) from your subprocess's file
 544    * descriptors.  Use the readLinesCallback() helper to get template
 545    * deduction.  For example:
 546    *
 547    *   auto read_cb = Subprocess::readLinesCallback(
 548    *     [](int fd, folly::StringPiece s) {
 549    *       std::cout << fd << " said: " << s;
 550    *       return false;  // Keep reading from the child
 551    *     }
 552    *   );
 553    *   subprocess.communicate(
 554    *     // ReadLinesCallback contains StreamSplitter contains IOBuf, making
 555    *     // it noncopyable, whereas std::function must be copyable.  So, we
 556    *     // keep the callback in a local, and instead pass a reference.
 557    *     std::ref(read_cb),
 558    *     [](int pdf, int cfd){ return true; }  // Don't write to the child
 559    *   );
 560    *
 561    * If a file line exceeds maxLineLength, your callback will get some
 562    * initial chunks of maxLineLength with no trailing delimiters.  The final
 563    * chunk of a line is delimiter-terminated iff the delimiter was present
 564    * in the input.  In particular, the last line in a file always lacks a
 565    * delimiter -- so if a file ends on a delimiter, the final line is empty.
 566    *
 567    * Like a regular communicate() callback, your fdLineCb() normally returns
 568    * false.  It may return true to tell Subprocess to close the underlying
 569    * file descriptor.  The child process may then receive SIGPIPE or get
 570    * EPIPE errors on writes.
 571    */
 572   template <class Callback>
 573   class ReadLinesCallback {
 574    private:
 575     // Binds an FD to the client-provided FD+line callback
 576     struct StreamSplitterCallback {
 577       StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) { }
 578       // The return value semantics are inverted vs StreamSplitter
 579       bool operator()(StringPiece s) { return !cb_(fd_, s); }
 580       Callback& cb_;
 581       int fd_;
 582     };
 583     typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
 584    public:
 585     explicit ReadLinesCallback(
 586       Callback&& fdLineCb,
 587       uint64_t maxLineLength = 0,  // No line length limit by default
 588       char delimiter = '\n',
 589       uint64_t bufSize = 1024
 590     ) : fdLineCb_(std::move(fdLineCb)),
 591         maxLineLength_(maxLineLength),
 592         delimiter_(delimiter),
 593         bufSize_(bufSize) {}
 594
 595     bool operator()(int pfd, int cfd) {
 596       // Make a splitter for this cfd if it doesn't already exist
 597       auto it = fdToSplitter_.find(cfd);
 598       auto& splitter = (it != fdToSplitter_.end()) ? it->second
 599         : fdToSplitter_.emplace(cfd, LineSplitter(
 600             delimiter_, StreamSplitterCallback(fdLineCb_, cfd), maxLineLength_
 601           )).first->second;
 602       // Read as much as we can from this FD
 603       char buf[bufSize_];
 604       while (true) {
 605         ssize_t ret = readNoInt(pfd, buf, bufSize_);
 606         if (ret == -1 && errno == EAGAIN) {  // No more data for now
 607           return false;
 608         }
 609         if (ret == 0) {  // Reached end-of-file
 610           splitter.flush();  // Ignore return since the file is over anyway
 611           return true;
 612         }
 613         if (!splitter(StringPiece(buf, ret))) {
 614           return true;  // The callback told us to stop
 615         }
 616       }
 617     }
 618
 619    private:
 620     Callback fdLineCb_;
 621     const uint64_t maxLineLength_;
 622     const char delimiter_;
 623     const uint64_t bufSize_;
 624     // We lazily make splitters for all cfds that get used.
 625     std::unordered_map<int, LineSplitter> fdToSplitter_;
 626   };
 627
 628   // Helper to enable template deduction
 629   template <class Callback>
 630   static ReadLinesCallback<Callback> readLinesCallback(
 631       Callback&& fdLineCb,
 632       uint64_t maxLineLength = 0,  // No line length limit by default
 633       char delimiter = '\n',
 634       uint64_t bufSize = 1024) {
 635     return ReadLinesCallback<Callback>(
 636       std::move(fdLineCb), maxLineLength, delimiter, bufSize
 637     );
 638   }
 639
 640   /**
 641    * communicate() callbacks can use this to temporarily enable/disable
 642    * notifications (callbacks) for a pipe to/from the child.  By default,
 643    * all are enabled.  Useful for "chatty" communication -- you want to
 644    * disable write callbacks until you receive the expected message.
 645    *
 646    * Disabling a pipe does not free you from the requirement to consume all
 647    * incoming data.  Failing to do so will easily create deadlock bugs.
 648    *
 649    * Throws if the childFd is not known.
 650    */
 651   void enableNotifications(int childFd, bool enabled);
 652
 653   /**
 654    * Are notifications for one pipe to/from child enabled?  Throws if the
 655    * childFd is not known.
 656    */
 657   bool notificationsEnabled(int childFd) const;
 658
 659   ////
 660   //// The following methods are meant for the cases when communicate() is
 661   //// not suitable.  You should not need them when you call communicate(),
 662   //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
 663   //// takeOwnershipOfPipes() from a communicate() callback.
 664   ////
 665
 666   /**
 667    * Close the parent file descriptor given a file descriptor in the child.
 668    * DO NOT USE from communicate() callbacks; make them return true instead.
 669    */
 670   void closeParentFd(int childFd);
 671
 672   /**
 673    * Set all pipes from / to child to be non-blocking.  communicate() does
 674    * this for you.
 675    */
 676   void setAllNonBlocking();
 677
 678   /**
 679    * Get parent file descriptor corresponding to the given file descriptor
 680    * in the child.  Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
 681    * Do not close() the returned file descriptor; use closeParentFd, above.
 682    */
 683   int parentFd(int childFd) const {
 684     return pipes_[findByChildFd(childFd)].pipe.fd();
 685   }
 686   int stdin() const { return parentFd(0); }
 687   int stdout() const { return parentFd(1); }
 688   int stderr() const { return parentFd(2); }
 689
 690   /**
 691    * The child's pipes are logically separate from the process metadata
 692    * (they may even be kept alive by the child's descendants).  This call
 693    * lets you manage the pipes' lifetime separetely from the lifetime of the
 694    * child process.
 695    *
 696    * After this call, the Subprocess instance will have no knowledge of
 697    * these pipes, and the caller assumes responsibility for managing their
 698    * lifetimes.  Pro-tip: prefer to explicitly close() the pipes, since
 699    * folly::File would otherwise silently suppress I/O errors.
 700    *
 701    * No, you may NOT call this from a communicate() callback.
 702    */
 703   struct ChildPipe {
 704     int childFd;
 705     folly::File pipe;  // Owns the parent FD
 706   };
 707   std::vector<ChildPipe> takeOwnershipOfPipes();
 708
 709  private:
 710   static const int RV_RUNNING = ProcessReturnCode::RV_RUNNING;
 711   static const int RV_NOT_STARTED = ProcessReturnCode::RV_NOT_STARTED;
 712
 713   // spawn() sets up a pipe to read errors from the child,
 714   // then calls spawnInternal() to do the bulk of the work.  Once
 715   // spawnInternal() returns it reads the error pipe to see if the child
 716   // encountered any errors.
 717   void spawn(
 718       std::unique_ptr<const char*[]> argv,
 719       const char* executable,
 720       const Options& options,
 721       const std::vector<std::string>* env);
 722   void spawnInternal(
 723       std::unique_ptr<const char*[]> argv,
 724       const char* executable,
 725       Options& options,
 726       const std::vector<std::string>* env,
 727       int errFd);
 728
 729   // Actions to run in child.
 730   // Note that this runs after vfork(), so tread lightly.
 731   // Returns 0 on success, or an errno value on failure.
 732   int prepareChild(const Options& options,
 733                    const sigset_t* sigmask,
 734                    const char* childDir) const;
 735   int runChild(const char* executable, char** argv, char** env,
 736                const Options& options) const;
 737
 738   /**
 739    * Read from the error pipe, and throw SubprocessSpawnError if the child
 740    * failed before calling exec().
 741    */
 742   void readChildErrorPipe(int pfd, const char* executable);
 743
 744   // Returns an index into pipes_. Throws std::invalid_argument if not found.
 745   size_t findByChildFd(const int childFd) const;
 746
 747
 748   pid_t pid_;
 749   ProcessReturnCode returnCode_;
 750
 751   /**
 752    * Represents a pipe between this process, and the child process (or its
 753    * descendant).  To interact with these pipes, you can use communicate(),
 754    * or use parentFd() and related methods, or separate them from the
 755    * Subprocess instance entirely via takeOwnershipOfPipes().
 756    */
 757   struct Pipe : private boost::totally_ordered<Pipe> {
 758     folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
 759     int childFd = -1; // Identifies the pipe: what FD is this in the child?
 760     int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
 761     bool enabled = true; // Are notifications enabled in communicate()?
 762
 763     bool operator<(const Pipe& other) const {
 764       return childFd < other.childFd;
 765     }
 766     bool operator==(const Pipe& other) const {
 767       return childFd == other.childFd;
 768     }
 769   };
 770
 771   // Populated at process start according to fdActions, empty after
 772   // takeOwnershipOfPipes().  Sorted by childFd.  Can only have elements
 773   // erased, but not inserted, after being populated.
 774   //
 775   // The number of pipes between parent and child is assumed to be small,
 776   // so we're happy with a vector here, even if it means linear erase.
 777   std::vector<Pipe> pipes_;
 778 };
 779
 780 inline Subprocess::Options& Subprocess::Options::operator|=(
 781     const Subprocess::Options& other) {
 782   if (this == &other) return *this;
 783   // Replace
 784   for (auto& p : other.fdActions_) {
 785     fdActions_[p.first] = p.second;
 786   }
 787   closeOtherFds_ |= other.closeOtherFds_;
 788   usePath_ |= other.usePath_;
 789   processGroupLeader_ |= other.processGroupLeader_;
 790   return *this;
 791 }
 792
 793 }  // namespace folly
 794
 795 #endif /* FOLLY_SUBPROCESS_H_ */