drm/amdgpu: add command submission workflow tracepoint
authorChunming Zhou <David1.Zhou@amd.com>
Wed, 11 Nov 2015 06:56:00 +0000 (14:56 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 16 Nov 2015 16:05:57 +0000 (11:05 -0500)
OGL needs these tracepoints to investigate performance issue.

Change-Id: I5e58187d061253f7d665dfce8e4e163ba91d3e2b
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

index bf32096b8eb791921cd8faad3a773227a3167706..2ae73d5232dda6319a5581b057073dba3c1e1a15 100644 (file)
@@ -888,7 +888,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                ttm_eu_fence_buffer_objects(&parser.ticket,
                                &parser.validated,
                                &job->base.s_fence->base);
-
+               trace_amdgpu_cs_ioctl(job);
                mutex_unlock(&job->job_lock);
                amdgpu_cs_parser_fini_late(&parser);
                mutex_unlock(&vm->mutex);
index dcf4a8aca680d885fc6a52414ca2c8e5e3bd0914..67f778f6eedbb041ed12b057fc6dd0732e28a022 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 
 static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
 {
@@ -45,6 +46,7 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
        }
        job = to_amdgpu_job(sched_job);
        mutex_lock(&job->job_lock);
+       trace_amdgpu_sched_run_job(job);
        r = amdgpu_ib_schedule(job->adev,
                               job->num_ibs,
                               job->ibs,
index 26e2d50a6f64daf7e0a2bc327654d8b7c3e20f10..8f9834ab1bd5a6ad2e0908dfeb0aa6ff5206fc31 100644 (file)
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
                      __entry->fences)
 );
 
+TRACE_EVENT(amdgpu_cs_ioctl,
+           TP_PROTO(struct amdgpu_job *job),
+           TP_ARGS(job),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_device *, adev)
+                            __field(struct amd_sched_job *, sched_job)
+                            __field(struct amdgpu_ib *, ib)
+                            __field(struct fence *, fence)
+                            __field(char *, ring_name)
+                            __field(u32, num_ibs)
+                            ),
+
+           TP_fast_assign(
+                          __entry->adev = job->adev;
+                          __entry->sched_job = &job->base;
+                          __entry->ib = job->ibs;
+                          __entry->fence = &job->base.s_fence->base;
+                          __entry->ring_name = job->ibs[0].ring->name;
+                          __entry->num_ibs = job->num_ibs;
+                          ),
+           TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+                     __entry->adev, __entry->sched_job, __entry->ib,
+                     __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+           TP_PROTO(struct amdgpu_job *job),
+           TP_ARGS(job),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_device *, adev)
+                            __field(struct amd_sched_job *, sched_job)
+                            __field(struct amdgpu_ib *, ib)
+                            __field(struct fence *, fence)
+                            __field(char *, ring_name)
+                            __field(u32, num_ibs)
+                            ),
+
+           TP_fast_assign(
+                          __entry->adev = job->adev;
+                          __entry->sched_job = &job->base;
+                          __entry->ib = job->ibs;
+                          __entry->fence = &job->base.s_fence->base;
+                          __entry->ring_name = job->ibs[0].ring->name;
+                          __entry->num_ibs = job->num_ibs;
+                          ),
+           TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+                     __entry->adev, __entry->sched_job, __entry->ib,
+                     __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+
 TRACE_EVENT(amdgpu_vm_grab_id,
            TP_PROTO(unsigned vmid, int ring),
            TP_ARGS(vmid, ring),
index 144f50acc9715c0e2b30b459e68fb7c9f7c543ba..c89dc777768f8cdabe3ce882483c134faa6aefa7 100644 (file)
@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job,
            TP_ARGS(sched_job),
            TP_STRUCT__entry(
                             __field(struct amd_sched_entity *, entity)
+                            __field(struct amd_sched_job *, sched_job)
+                            __field(struct fence *, fence)
                             __field(const char *, name)
                             __field(u32, job_count)
                             __field(int, hw_job_count)
@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job,
 
            TP_fast_assign(
                           __entry->entity = sched_job->s_entity;
+                          __entry->sched_job = sched_job;
+                          __entry->fence = &sched_job->s_fence->base;
                           __entry->name = sched_job->sched->name;
                           __entry->job_count = kfifo_len(
                                   &sched_job->s_entity->job_queue) / sizeof(sched_job);
                           __entry->hw_job_count = atomic_read(
                                   &sched_job->sched->hw_rq_count);
                           ),
-           TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
-                     __entry->entity, __entry->name, __entry->job_count,
-                     __entry->hw_job_count)
+           TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
+                     __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
+                     __entry->job_count, __entry->hw_job_count)
 );
+
+TRACE_EVENT(amd_sched_process_job,
+           TP_PROTO(struct amd_sched_fence *fence),
+           TP_ARGS(fence),
+           TP_STRUCT__entry(
+                   __field(struct fence *, fence)
+                   ),
+
+           TP_fast_assign(
+                   __entry->fence = &fence->base;
+                   ),
+           TP_printk("fence=%p signaled", __entry->fence)
+);
+
 #endif
 
 /* This part must be outside protection */
index fe5b3c415f188788a435b85dd59cee0144297703..b8925fea577cba84ec2be164a717c5879c0a1e56 100644 (file)
@@ -346,6 +346,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
                list_del_init(&s_fence->list);
                spin_unlock_irqrestore(&sched->fence_list_lock, flags);
        }
+       trace_amd_sched_process_job(s_fence);
        fence_put(&s_fence->base);
        wake_up_interruptible(&sched->wake_up_worker);
 }