Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 16:34:27 +0000 (09:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 16:34:27 +0000 (09:34 -0700)
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf_counter: Fix buffer overflow in perf_copy_attr()

1  2 
kernel/perf_counter.c

diff --combined kernel/perf_counter.c
index e0d91fdf0c3cfbb417c6fd94d38f2c5700942815,a67a1dc3cfa3858c0ca969d1feea2f5605fb331b..e7f60f8e31edc99a57a57fb4fb0c2353c9093341
@@@ -46,18 -46,12 +46,18 @@@ static atomic_t nr_task_counters __read
  
  /*
   * perf counter paranoia level:
 - *  0 - not paranoid
 - *  1 - disallow cpu counters to unpriv
 - *  2 - disallow kernel profiling to unpriv
 + *  -1 - not paranoid at all
 + *   0 - disallow raw tracepoint access for unpriv
 + *   1 - disallow cpu counters for unpriv
 + *   2 - disallow kernel profiling for unpriv
   */
  int sysctl_perf_counter_paranoid __read_mostly = 1;
  
 +static inline bool perf_paranoid_tracepoint_raw(void)
 +{
 +      return sysctl_perf_counter_paranoid > -1;
 +}
 +
  static inline bool perf_paranoid_cpu(void)
  {
        return sysctl_perf_counter_paranoid > 0;
@@@ -475,8 -469,7 +475,8 @@@ static void update_counter_times(struc
        struct perf_counter_context *ctx = counter->ctx;
        u64 run_end;
  
 -      if (counter->state < PERF_COUNTER_STATE_INACTIVE)
 +      if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
 +          counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
                return;
  
        counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
@@@ -525,7 -518,7 +525,7 @@@ static void __perf_counter_disable(voi
         */
        if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
                update_context_time(ctx);
 -              update_counter_times(counter);
 +              update_group_times(counter);
                if (counter == counter->group_leader)
                        group_sched_out(counter, cpuctx, ctx);
                else
@@@ -580,7 -573,7 +580,7 @@@ static void perf_counter_disable(struc
         * in, so we can change the state safely.
         */
        if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 -              update_counter_times(counter);
 +              update_group_times(counter);
                counter->state = PERF_COUNTER_STATE_OFF;
        }
  
@@@ -857,27 -850,6 +857,27 @@@ retry
        spin_unlock_irq(&ctx->lock);
  }
  
 +/*
 + * Put a counter into inactive state and update time fields.
 + * Enabling the leader of a group effectively enables all
 + * the group members that aren't explicitly disabled, so we
 + * have to update their ->tstamp_enabled also.
 + * Note: this works for group members as well as group leaders
 + * since the non-leader members' sibling_lists will be empty.
 + */
 +static void __perf_counter_mark_enabled(struct perf_counter *counter,
 +                                      struct perf_counter_context *ctx)
 +{
 +      struct perf_counter *sub;
 +
 +      counter->state = PERF_COUNTER_STATE_INACTIVE;
 +      counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
 +      list_for_each_entry(sub, &counter->sibling_list, list_entry)
 +              if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
 +                      sub->tstamp_enabled =
 +                              ctx->time - sub->total_time_enabled;
 +}
 +
  /*
   * Cross CPU call to enable a performance counter
   */
@@@ -905,7 -877,8 +905,7 @@@ static void __perf_counter_enable(void 
  
        if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
                goto unlock;
 -      counter->state = PERF_COUNTER_STATE_INACTIVE;
 -      counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
 +      __perf_counter_mark_enabled(counter, ctx);
  
        /*
         * If the counter is in a group and isn't the group leader,
@@@ -998,9 -971,11 +998,9 @@@ static void perf_counter_enable(struct 
         * Since we have the lock this context can't be scheduled
         * in, so we can change the state safely.
         */
 -      if (counter->state == PERF_COUNTER_STATE_OFF) {
 -              counter->state = PERF_COUNTER_STATE_INACTIVE;
 -              counter->tstamp_enabled =
 -                      ctx->time - counter->total_time_enabled;
 -      }
 +      if (counter->state == PERF_COUNTER_STATE_OFF)
 +              __perf_counter_mark_enabled(counter, ctx);
 +
   out:
        spin_unlock_irq(&ctx->lock);
  }
@@@ -1504,7 -1479,9 +1504,7 @@@ static void perf_counter_enable_on_exec
                counter->attr.enable_on_exec = 0;
                if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
                        continue;
 -              counter->state = PERF_COUNTER_STATE_INACTIVE;
 -              counter->tstamp_enabled =
 -                      ctx->time - counter->total_time_enabled;
 +              __perf_counter_mark_enabled(counter, ctx);
                enabled = 1;
        }
  
@@@ -1698,11 -1675,6 +1698,11 @@@ static void free_counter(struct perf_co
                        atomic_dec(&nr_task_counters);
        }
  
 +      if (counter->output) {
 +              fput(counter->output->filp);
 +              counter->output = NULL;
 +      }
 +
        if (counter->destroy)
                counter->destroy(counter);
  
@@@ -1988,8 -1960,6 +1988,8 @@@ unlock
        return ret;
  }
  
 +int perf_counter_set_output(struct perf_counter *counter, int output_fd);
 +
  static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  {
        struct perf_counter *counter = file->private_data;
        case PERF_COUNTER_IOC_PERIOD:
                return perf_counter_period(counter, (u64 __user *)arg);
  
 +      case PERF_COUNTER_IOC_SET_OUTPUT:
 +              return perf_counter_set_output(counter, arg);
 +
        default:
                return -ENOTTY;
        }
@@@ -2286,11 -2253,6 +2286,11 @@@ static int perf_mmap(struct file *file
  
        WARN_ON_ONCE(counter->ctx->parent_ctx);
        mutex_lock(&counter->mmap_mutex);
 +      if (counter->output) {
 +              ret = -EINVAL;
 +              goto unlock;
 +      }
 +
        if (atomic_inc_not_zero(&counter->mmap_count)) {
                if (nr_pages != counter->data->nr_pages)
                        ret = -EINVAL;
@@@ -2676,7 -2638,6 +2676,7 @@@ static int perf_output_begin(struct per
                             struct perf_counter *counter, unsigned int size,
                             int nmi, int sample)
  {
 +      struct perf_counter *output_counter;
        struct perf_mmap_data *data;
        unsigned int offset, head;
        int have_lost;
                u64                      lost;
        } lost_event;
  
 +      rcu_read_lock();
        /*
         * For inherited counters we send all the output towards the parent.
         */
        if (counter->parent)
                counter = counter->parent;
  
 -      rcu_read_lock();
 +      output_counter = rcu_dereference(counter->output);
 +      if (output_counter)
 +              counter = output_counter;
 +
        data = rcu_dereference(counter->data);
        if (!data)
                goto out;
@@@ -3977,7 -3934,6 +3977,7 @@@ static const struct pmu *tp_perf_counte
         * have these.
         */
        if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
 +                      perf_paranoid_tracepoint_raw() &&
                        !capable(CAP_SYS_ADMIN))
                return ERR_PTR(-EPERM);
  
@@@ -4215,6 -4171,7 +4215,7 @@@ static int perf_copy_attr(struct perf_c
                        if (val)
                                goto err_size;
                }
+               size = sizeof(*attr);
        }
  
        ret = copy_from_user(attr, uattr, size);
@@@ -4246,57 -4203,6 +4247,57 @@@ err_size
        goto out;
  }
  
 +int perf_counter_set_output(struct perf_counter *counter, int output_fd)
 +{
 +      struct perf_counter *output_counter = NULL;
 +      struct file *output_file = NULL;
 +      struct perf_counter *old_output;
 +      int fput_needed = 0;
 +      int ret = -EINVAL;
 +
 +      if (!output_fd)
 +              goto set;
 +
 +      output_file = fget_light(output_fd, &fput_needed);
 +      if (!output_file)
 +              return -EBADF;
 +
 +      if (output_file->f_op != &perf_fops)
 +              goto out;
 +
 +      output_counter = output_file->private_data;
 +
 +      /* Don't chain output fds */
 +      if (output_counter->output)
 +              goto out;
 +
 +      /* Don't set an output fd when we already have an output channel */
 +      if (counter->data)
 +              goto out;
 +
 +      atomic_long_inc(&output_file->f_count);
 +
 +set:
 +      mutex_lock(&counter->mmap_mutex);
 +      old_output = counter->output;
 +      rcu_assign_pointer(counter->output, output_counter);
 +      mutex_unlock(&counter->mmap_mutex);
 +
 +      if (old_output) {
 +              /*
 +               * we need to make sure no existing perf_output_*()
 +               * is still referencing this counter.
 +               */
 +              synchronize_rcu();
 +              fput(old_output->filp);
 +      }
 +
 +      ret = 0;
 +out:
 +      fput_light(output_file, fput_needed);
 +      return ret;
 +}
 +
  /**
   * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
   *
@@@ -4316,15 -4222,15 +4317,15 @@@ SYSCALL_DEFINE5(perf_counter_open
        struct file *group_file = NULL;
        int fput_needed = 0;
        int fput_needed2 = 0;
 -      int ret;
 +      int err;
  
        /* for future expandability... */
 -      if (flags)
 +      if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
                return -EINVAL;
  
 -      ret = perf_copy_attr(attr_uptr, &attr);
 -      if (ret)
 -              return ret;
 +      err = perf_copy_attr(attr_uptr, &attr);
 +      if (err)
 +              return err;
  
        if (!attr.exclude_kernel) {
                if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
         * Look up the group leader (we will attach this counter to it):
         */
        group_leader = NULL;
 -      if (group_fd != -1) {
 -              ret = -EINVAL;
 +      if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
 +              err = -EINVAL;
                group_file = fget_light(group_fd, &fput_needed);
                if (!group_file)
                        goto err_put_context;
  
        counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
                                     NULL, GFP_KERNEL);
 -      ret = PTR_ERR(counter);
 +      err = PTR_ERR(counter);
        if (IS_ERR(counter))
                goto err_put_context;
  
 -      ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
 -      if (ret < 0)
 +      err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
 +      if (err < 0)
                goto err_free_put_context;
  
 -      counter_file = fget_light(ret, &fput_needed2);
 +      counter_file = fget_light(err, &fput_needed2);
        if (!counter_file)
                goto err_free_put_context;
  
 +      if (flags & PERF_FLAG_FD_OUTPUT) {
 +              err = perf_counter_set_output(counter, group_fd);
 +              if (err)
 +                      goto err_fput_free_put_context;
 +      }
 +
        counter->filp = counter_file;
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
        list_add_tail(&counter->owner_entry, &current->perf_counter_list);
        mutex_unlock(&current->perf_counter_mutex);
  
 +err_fput_free_put_context:
        fput_light(counter_file, fput_needed2);
  
 -out_fput:
 -      fput_light(group_file, fput_needed);
 -
 -      return ret;
 -
  err_free_put_context:
 -      kfree(counter);
 +      if (err < 0)
 +              kfree(counter);
  
  err_put_context:
 -      put_ctx(ctx);
 +      if (err < 0)
 +              put_ctx(ctx);
 +
 +      fput_light(group_file, fput_needed);
  
 -      goto out_fput;
 +      return err;
  }
  
  /*