3 * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
18 #include <mali_kbase.h>
19 #include <linux/spinlock.h>
21 #ifdef CONFIG_DEBUG_FS
23 static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
25 struct list_head *event_list = &kbdev->job_fault_event_list;
29 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
30 ret = !list_empty(event_list);
31 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
36 static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
38 struct kbase_device *kbdev = kctx->kbdev;
39 struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
40 struct base_job_fault_event *event;
43 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
44 if (list_empty(event_list)) {
45 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
48 list_for_each_entry(event, event_list, head) {
49 if (event->katom->kctx == kctx) {
50 spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
55 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
59 /* wait until the fault happen and copy the event */
60 static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
61 struct base_job_fault_event *event)
63 struct list_head *event_list = &kbdev->job_fault_event_list;
64 struct base_job_fault_event *event_in;
67 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
68 if (list_empty(event_list)) {
69 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
70 if (wait_event_interruptible(kbdev->job_fault_wq,
71 kbase_is_job_fault_event_pending(kbdev)))
73 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
76 event_in = list_entry(event_list->next,
77 struct base_job_fault_event, head);
78 event->event_code = event_in->event_code;
79 event->katom = event_in->katom;
81 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
87 /* remove the event from the queue */
88 static struct base_job_fault_event *kbase_job_fault_event_dequeue(
89 struct kbase_device *kbdev, struct list_head *event_list)
91 struct base_job_fault_event *event;
93 event = list_entry(event_list->next,
94 struct base_job_fault_event, head);
95 list_del(event_list->next);
101 /* Remove all the following atoms after the failed atom in the same context
102 * Call the postponed bottom half of job done.
103 * Then, this context could be rescheduled.
105 static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
107 struct list_head *event_list = &kctx->job_fault_resume_event_list;
109 while (!list_empty(event_list)) {
110 struct base_job_fault_event *event;
112 event = kbase_job_fault_event_dequeue(kctx->kbdev,
113 &kctx->job_fault_resume_event_list);
114 kbase_jd_done_worker(&event->katom->work);
119 /* Remove all the failed atoms that belong to different contexts
120 * Resume all the contexts that were suspend due to failed job
122 static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
124 struct list_head *event_list = &kbdev->job_fault_event_list;
127 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
128 while (!list_empty(event_list)) {
129 kbase_job_fault_event_dequeue(kbdev, event_list);
130 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
131 wake_up(&kbdev->job_fault_resume_wq);
132 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
134 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
137 static void kbase_job_fault_resume_worker(struct work_struct *data)
139 struct base_job_fault_event *event = container_of(data,
140 struct base_job_fault_event, job_fault_work);
141 struct kbase_context *kctx;
142 struct kbase_jd_atom *katom;
144 katom = event->katom;
147 dev_info(kctx->kbdev->dev, "Job dumping wait\n");
149 /* When it was waked up, it need to check if queue is empty or the
150 * failed atom belongs to different context. If yes, wake up. Both
151 * of them mean the failed job has been dumped. Please note, it
152 * should never happen that the job_fault_event_list has the two
153 * atoms belong to the same context.
155 wait_event(kctx->kbdev->job_fault_resume_wq,
156 kbase_ctx_has_no_event_pending(kctx));
158 atomic_set(&kctx->job_fault_count, 0);
159 kbase_jd_done_worker(&katom->work);
161 /* In case the following atoms were scheduled during failed job dump
162 * the job_done_worker was held. We need to rerun it after the dump
165 kbase_job_fault_resume_event_cleanup(kctx);
167 dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
170 static struct base_job_fault_event *kbase_job_fault_event_queue(
171 struct list_head *event_list,
172 struct kbase_jd_atom *atom,
175 struct base_job_fault_event *event;
177 event = &atom->fault_event;
180 event->event_code = completion_code;
182 list_add_tail(&event->head, event_list);
188 static void kbase_job_fault_event_post(struct kbase_device *kbdev,
189 struct kbase_jd_atom *katom, u32 completion_code)
191 struct base_job_fault_event *event;
194 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
195 event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
196 katom, completion_code);
197 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
199 wake_up_interruptible(&kbdev->job_fault_wq);
201 INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
202 queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
204 dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
205 katom->kctx->tgid, katom->kctx->id);
210 * This function will process the job fault
211 * Get the register copy
212 * Send the failed job dump event
213 * Create a Wait queue to wait until the job dump finish
216 bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
219 struct kbase_context *kctx = katom->kctx;
221 /* Check if dumping is in the process
222 * only one atom of each context can be dumped at the same time
223 * If the atom belongs to different context, it can be dumped
225 if (atomic_read(&kctx->job_fault_count) > 0) {
226 kbase_job_fault_event_queue(
227 &kctx->job_fault_resume_event_list,
228 katom, completion_code);
229 dev_info(kctx->kbdev->dev, "queue:%d\n",
230 kbase_jd_atom_id(kctx, katom));
234 if (kctx->kbdev->job_fault_debug == true) {
236 if (completion_code != BASE_JD_EVENT_DONE) {
238 if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
239 dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
243 kbase_job_fault_event_post(kctx->kbdev, katom,
245 atomic_inc(&kctx->job_fault_count);
246 dev_info(kctx->kbdev->dev, "post:%d\n",
247 kbase_jd_atom_id(kctx, katom));
256 static int debug_job_fault_show(struct seq_file *m, void *v)
258 struct kbase_device *kbdev = m->private;
259 struct base_job_fault_event *event = (struct base_job_fault_event *)v;
260 struct kbase_context *kctx = event->katom->kctx;
263 dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
264 kctx->tgid, kctx->id, event->reg_offset);
266 if (kctx->reg_dump == NULL) {
267 dev_warn(kbdev->dev, "reg dump is NULL");
271 if (kctx->reg_dump[event->reg_offset] ==
272 REGISTER_DUMP_TERMINATION_FLAG) {
273 /* Return the error here to stop the read. And the
274 * following next() will not be called. The stop can
275 * get the real event resource and release it
280 if (event->reg_offset == 0)
281 seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
283 for (i = 0; i < 50; i++) {
284 if (kctx->reg_dump[event->reg_offset] ==
285 REGISTER_DUMP_TERMINATION_FLAG) {
288 seq_printf(m, "%08x: %08x\n",
289 kctx->reg_dump[event->reg_offset],
290 kctx->reg_dump[1+event->reg_offset]);
291 event->reg_offset += 2;
298 static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
300 struct kbase_device *kbdev = m->private;
301 struct base_job_fault_event *event = (struct base_job_fault_event *)v;
303 dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
304 event->reg_offset, (int)*pos);
309 static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
311 struct kbase_device *kbdev = m->private;
312 struct base_job_fault_event *event;
314 dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
316 /* The condition is trick here. It needs make sure the
317 * fault hasn't happened and the dumping hasn't been started,
318 * or the dumping has finished
321 event = kmalloc(sizeof(*event), GFP_KERNEL);
324 event->reg_offset = 0;
325 if (kbase_job_fault_event_wait(kbdev, event)) {
330 /* The cache flush workaround is called in bottom half of
331 * job done but we delayed it. Now we should clean cache
332 * earlier. Then the GPU memory dump should be correct.
334 if (event->katom->need_cache_flush_cores_retained) {
335 kbase_gpu_cacheclean(kbdev, event->katom);
336 event->katom->need_cache_flush_cores_retained = 0;
345 static void debug_job_fault_stop(struct seq_file *m, void *v)
347 struct kbase_device *kbdev = m->private;
349 /* here we wake up the kbase_jd_done_worker after stop, it needs
350 * get the memory dump before the register dump in debug daemon,
351 * otherwise, the memory dump may be incorrect.
356 dev_info(kbdev->dev, "debug job fault seq stop stage 1");
361 spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
362 if (!list_empty(&kbdev->job_fault_event_list)) {
363 kbase_job_fault_event_dequeue(kbdev,
364 &kbdev->job_fault_event_list);
365 wake_up(&kbdev->job_fault_resume_wq);
367 spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
368 dev_info(kbdev->dev, "debug job fault seq stop stage 2");
373 static const struct seq_operations ops = {
374 .start = debug_job_fault_start,
375 .next = debug_job_fault_next,
376 .stop = debug_job_fault_stop,
377 .show = debug_job_fault_show,
380 static int debug_job_fault_open(struct inode *in, struct file *file)
382 struct kbase_device *kbdev = in->i_private;
384 seq_open(file, &ops);
386 ((struct seq_file *)file->private_data)->private = kbdev;
387 dev_info(kbdev->dev, "debug job fault seq open");
389 kbdev->job_fault_debug = true;
395 static int debug_job_fault_release(struct inode *in, struct file *file)
397 struct kbase_device *kbdev = in->i_private;
399 seq_release(in, file);
401 kbdev->job_fault_debug = false;
403 /* Clean the unprocessed job fault. After that, all the suspended
404 * contexts could be rescheduled.
406 kbase_job_fault_event_cleanup(kbdev);
408 dev_info(kbdev->dev, "debug job fault seq close");
413 static const struct file_operations kbasep_debug_job_fault_fops = {
414 .open = debug_job_fault_open,
417 .release = debug_job_fault_release,
421 * Initialize debugfs entry for job fault dump
423 void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
425 debugfs_create_file("job_fault", S_IRUGO,
426 kbdev->mali_debugfs_directory, kbdev,
427 &kbasep_debug_job_fault_fops);
431 int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
434 INIT_LIST_HEAD(&kbdev->job_fault_event_list);
436 init_waitqueue_head(&(kbdev->job_fault_wq));
437 init_waitqueue_head(&(kbdev->job_fault_resume_wq));
438 spin_lock_init(&kbdev->job_fault_event_lock);
440 kbdev->job_fault_resume_workq = alloc_workqueue(
441 "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
442 if (!kbdev->job_fault_resume_workq)
445 kbdev->job_fault_debug = false;
451 * Release the relevant resource per device
453 void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
455 destroy_workqueue(kbdev->job_fault_resume_workq);
460 * Initialize the relevant data structure per context
462 void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
465 /* We need allocate double size register range
466 * Because this memory will keep the register address and value
468 kctx->reg_dump = vmalloc(0x4000 * 2);
469 if (kctx->reg_dump == NULL)
472 if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
473 vfree(kctx->reg_dump);
474 kctx->reg_dump = NULL;
476 INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
477 atomic_set(&kctx->job_fault_count, 0);
482 * release the relevant resource per context
484 void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
486 vfree(kctx->reg_dump);
489 #else /* CONFIG_DEBUG_FS */
491 int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
493 kbdev->job_fault_debug = false;
498 void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
502 #endif /* CONFIG_DEBUG_FS */