3 * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
18 #include "mali_kbase_debug_job_fault.h"
20 #ifdef CONFIG_DEBUG_FS
22 static bool kbase_is_job_fault_event_pending(struct list_head *event_list)
26 ret = (!list_empty(event_list));
31 static bool kbase_ctx_has_no_event_pending(
32 struct kbase_context *kctx, struct list_head *event_list)
34 struct base_job_fault_event *event;
36 if (list_empty(event_list))
38 list_for_each_entry(event, event_list, head) {
39 if (event->katom->kctx == kctx)
45 /* wait until the fault happen and copy the event */
46 static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
47 struct list_head *event_list,
48 struct base_job_fault_event *event)
50 struct base_job_fault_event *event_in;
52 if (list_empty(event_list)) {
53 if (wait_event_interruptible(kbdev->job_fault_wq,
54 kbase_is_job_fault_event_pending(event_list)))
58 event_in = list_entry(event_list->next,
59 struct base_job_fault_event, head);
61 event->event_code = event_in->event_code;
62 event->katom = event_in->katom;
67 /* remove the event from the queue */
68 static struct base_job_fault_event *kbase_job_fault_event_dequeue(
69 struct kbase_device *kbdev, struct list_head *event_list)
71 struct base_job_fault_event *event;
73 event = list_entry(event_list->next,
74 struct base_job_fault_event, head);
75 list_del(event_list->next);
81 /* Remove all the following atoms after the failed atom in the same context
82 * Call the postponed bottom half of job done.
83 * Then, this context could be rescheduled.
85 static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
87 struct list_head *event_list = &kctx->job_fault_resume_event_list;
89 while (!list_empty(event_list)) {
90 struct base_job_fault_event *event;
92 event = kbase_job_fault_event_dequeue(kctx->kbdev,
93 &kctx->job_fault_resume_event_list);
94 kbase_jd_done_worker(&event->katom->work);
99 /* Remove all the failed atoms that belong to different contexts
100 * Resume all the contexts that were suspend due to failed job
102 static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
104 struct list_head *event_list = &kbdev->job_fault_event_list;
106 while (!list_empty(event_list)) {
108 kbase_job_fault_event_dequeue(kbdev, event_list);
109 wake_up(&kbdev->job_fault_resume_wq);
113 static void kbase_job_fault_resume_worker(struct work_struct *data)
115 struct base_job_fault_event *event = container_of(data,
116 struct base_job_fault_event, job_fault_work);
117 struct kbase_context *kctx;
118 struct kbase_jd_atom *katom;
120 katom = event->katom;
123 dev_info(kctx->kbdev->dev, "Job dumping wait\n");
125 /* When it was waked up, it need to check if queue is empty or the
126 * failed atom belongs to different context. If yes, wake up. Both
127 * of them mean the failed job has been dumped. Please note, it
128 * should never happen that the job_fault_event_list has the two
129 * atoms belong to the same context.
131 wait_event(kctx->kbdev->job_fault_resume_wq,
132 kbase_ctx_has_no_event_pending(kctx,
133 &kctx->kbdev->job_fault_event_list));
135 atomic_set(&kctx->job_fault_count, 0);
136 kbase_jd_done_worker(&katom->work);
138 /* In case the following atoms were scheduled during failed job dump
139 * the job_done_worker was held. We need to rerun it after the dump
142 kbase_job_fault_resume_event_cleanup(kctx);
144 dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
147 static struct base_job_fault_event *kbase_job_fault_event_queue(
148 struct list_head *event_list,
149 struct kbase_jd_atom *atom,
152 struct base_job_fault_event *event;
154 event = &atom->fault_event;
157 event->event_code = completion_code;
159 list_add_tail(&event->head, event_list);
165 static void kbase_job_fault_event_post(struct kbase_device *kbdev,
166 struct kbase_jd_atom *katom, u32 completion_code)
168 struct base_job_fault_event *event;
170 event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
171 katom, completion_code);
173 wake_up_interruptible(&kbdev->job_fault_wq);
175 INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
176 queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
178 dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
179 katom->kctx->tgid, katom->kctx->id);
184 * This function will process the job fault
185 * Get the register copy
186 * Send the failed job dump event
187 * Create a Wait queue to wait until the job dump finish
190 bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
193 struct kbase_context *kctx = katom->kctx;
195 /* Check if dumping is in the process
196 * only one atom of each context can be dumped at the same time
197 * If the atom belongs to different context, it can be dumped
199 if (atomic_read(&kctx->job_fault_count) > 0) {
200 kbase_job_fault_event_queue(
201 &kctx->job_fault_resume_event_list,
202 katom, completion_code);
203 dev_info(kctx->kbdev->dev, "queue:%d\n",
204 kbase_jd_atom_id(kctx, katom));
208 if (kctx->kbdev->job_fault_debug == true) {
210 if (completion_code != BASE_JD_EVENT_DONE) {
212 if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
213 dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
217 kbase_job_fault_event_post(kctx->kbdev, katom,
219 atomic_inc(&kctx->job_fault_count);
220 dev_info(kctx->kbdev->dev, "post:%d\n",
221 kbase_jd_atom_id(kctx, katom));
230 static int debug_job_fault_show(struct seq_file *m, void *v)
232 struct kbase_device *kbdev = m->private;
233 struct base_job_fault_event *event = (struct base_job_fault_event *)v;
234 struct kbase_context *kctx = event->katom->kctx;
237 dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
238 kctx->tgid, kctx->id, event->reg_offset);
240 if (kctx->reg_dump == NULL) {
241 dev_warn(kbdev->dev, "reg dump is NULL");
245 if (kctx->reg_dump[event->reg_offset] ==
246 REGISTER_DUMP_TERMINATION_FLAG) {
247 /* Return the error here to stop the read. And the
248 * following next() will not be called. The stop can
249 * get the real event resource and release it
254 if (event->reg_offset == 0)
255 seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
257 for (i = 0; i < 50; i++) {
258 if (kctx->reg_dump[event->reg_offset] ==
259 REGISTER_DUMP_TERMINATION_FLAG) {
262 seq_printf(m, "%08x: %08x\n",
263 kctx->reg_dump[event->reg_offset],
264 kctx->reg_dump[1+event->reg_offset]);
265 event->reg_offset += 2;
272 static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
274 struct kbase_device *kbdev = m->private;
275 struct base_job_fault_event *event = (struct base_job_fault_event *)v;
277 dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
278 event->reg_offset, (int)*pos);
283 static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
285 struct kbase_device *kbdev = m->private;
286 struct base_job_fault_event *event;
288 dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
290 /* The condition is trick here. It needs make sure the
291 * fault hasn't happened and the dumping hasn't been started,
292 * or the dumping has finished
295 event = kmalloc(sizeof(*event), GFP_KERNEL);
296 event->reg_offset = 0;
297 if (kbase_job_fault_event_wait(kbdev,
298 &kbdev->job_fault_event_list, event)) {
303 /* The cache flush workaround is called in bottom half of
304 * job done but we delayed it. Now we should clean cache
305 * earlier. Then the GPU memory dump should be correct.
307 if (event->katom->need_cache_flush_cores_retained) {
308 kbase_gpu_cacheclean(kbdev, event->katom);
309 event->katom->need_cache_flush_cores_retained = 0;
318 static void debug_job_fault_stop(struct seq_file *m, void *v)
320 struct kbase_device *kbdev = m->private;
322 /* here we wake up the kbase_jd_done_worker after stop, it needs
323 * get the memory dump before the register dump in debug daemon,
324 * otherwise, the memory dump may be incorrect.
329 dev_info(kbdev->dev, "debug job fault seq stop stage 1");
332 if (!list_empty(&kbdev->job_fault_event_list)) {
333 kbase_job_fault_event_dequeue(kbdev,
334 &kbdev->job_fault_event_list);
335 wake_up(&kbdev->job_fault_resume_wq);
337 dev_info(kbdev->dev, "debug job fault seq stop stage 2");
342 static const struct seq_operations ops = {
343 .start = debug_job_fault_start,
344 .next = debug_job_fault_next,
345 .stop = debug_job_fault_stop,
346 .show = debug_job_fault_show,
349 static int debug_job_fault_open(struct inode *in, struct file *file)
351 struct kbase_device *kbdev = in->i_private;
353 seq_open(file, &ops);
355 ((struct seq_file *)file->private_data)->private = kbdev;
356 dev_info(kbdev->dev, "debug job fault seq open");
358 kbdev->job_fault_debug = true;
364 static int debug_job_fault_release(struct inode *in, struct file *file)
366 struct kbase_device *kbdev = in->i_private;
368 seq_release(in, file);
370 kbdev->job_fault_debug = false;
372 /* Clean the unprocessed job fault. After that, all the suspended
373 * contexts could be rescheduled.
375 kbase_job_fault_event_cleanup(kbdev);
377 dev_info(kbdev->dev, "debug job fault seq close");
382 static const struct file_operations kbasep_debug_job_fault_fops = {
383 .open = debug_job_fault_open,
386 .release = debug_job_fault_release,
389 static int kbase_job_fault_event_init(struct kbase_device *kbdev)
392 INIT_LIST_HEAD(&kbdev->job_fault_event_list);
394 init_waitqueue_head(&(kbdev->job_fault_wq));
395 init_waitqueue_head(&(kbdev->job_fault_resume_wq));
397 kbdev->job_fault_resume_workq = alloc_workqueue(
398 "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
404 * Initialize debugfs entry for job fault dump
406 void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
408 debugfs_create_file("job_fault", S_IRUGO,
409 kbdev->mali_debugfs_directory, kbdev,
410 &kbasep_debug_job_fault_fops);
412 kbase_job_fault_event_init(kbdev);
413 kbdev->job_fault_debug = false;
418 * Initialize the relevant data structure per context
420 void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
423 /* We need allocate double size register range
424 * Because this memory will keep the register address and value
426 kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL);
427 if (kctx->reg_dump == NULL)
430 if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
431 kfree(kctx->reg_dump);
432 kctx->reg_dump = NULL;
434 INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
435 atomic_set(&kctx->job_fault_count, 0);
440 * release the relevant resource per context
442 void kbase_debug_job_fault_context_exit(struct kbase_context *kctx)
444 kfree(kctx->reg_dump);