4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include <linux/libcfs/libcfs.h>
41 #include <obd_class.h>
43 #include <lustre/lustre_idl.h>
45 #include "lov_internal.h"
47 static void lov_init_set(struct lov_request_set *set)
50 atomic_set(&set->set_completes, 0);
51 atomic_set(&set->set_success, 0);
52 atomic_set(&set->set_finish_checked, 0);
54 INIT_LIST_HEAD(&set->set_list);
55 atomic_set(&set->set_refcount, 1);
56 init_waitqueue_head(&set->set_waitq);
57 spin_lock_init(&set->set_lock);
60 void lov_finish_set(struct lov_request_set *set)
62 struct list_head *pos, *n;
65 list_for_each_safe(pos, n, &set->set_list) {
66 struct lov_request *req = list_entry(pos,
69 list_del_init(&req->rq_link);
72 OBDO_FREE(req->rq_oi.oi_oa);
74 OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
75 if (req->rq_oi.oi_osfs)
76 OBD_FREE(req->rq_oi.oi_osfs,
77 sizeof(*req->rq_oi.oi_osfs));
78 OBD_FREE(req, sizeof(*req));
82 int len = set->set_oabufs * sizeof(*set->set_pga);
83 OBD_FREE_LARGE(set->set_pga, len);
86 lov_llh_put(set->set_lockh);
88 OBD_FREE(set, sizeof(*set));
92 int lov_set_finished(struct lov_request_set *set, int idempotent)
94 int completes = atomic_read(&set->set_completes);
96 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
98 if (completes == set->set_count) {
101 if (atomic_inc_return(&set->set_finish_checked) == 1)
107 void lov_update_set(struct lov_request_set *set,
108 struct lov_request *req, int rc)
110 req->rq_complete = 1;
113 atomic_inc(&set->set_completes);
115 atomic_inc(&set->set_success);
117 wake_up(&set->set_waitq);
120 int lov_update_common_set(struct lov_request_set *set,
121 struct lov_request *req, int rc)
123 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
125 lov_update_set(set, req, rc);
127 /* grace error on inactive ost */
128 if (rc && !(lov->lov_tgts[req->rq_idx] &&
129 lov->lov_tgts[req->rq_idx]->ltd_active))
132 /* FIXME in raid1 regime, should return 0 */
136 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
138 list_add_tail(&req->rq_link, &set->set_list);
143 static int lov_check_set(struct lov_obd *lov, int idx)
146 mutex_lock(&lov->lov_lock);
148 if (lov->lov_tgts[idx] == NULL ||
149 lov->lov_tgts[idx]->ltd_active ||
150 (lov->lov_tgts[idx]->ltd_exp != NULL &&
151 class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
154 mutex_unlock(&lov->lov_lock);
158 /* Check if the OSC connection exists and is active.
159 * If the OSC has not yet had a chance to connect to the OST the first time,
160 * wait once for it to connect instead of returning an error.
162 int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
164 wait_queue_head_t waitq;
165 struct l_wait_info lwi;
166 struct lov_tgt_desc *tgt;
169 mutex_lock(&lov->lov_lock);
171 tgt = lov->lov_tgts[ost_idx];
173 if (unlikely(tgt == NULL))
176 if (likely(tgt->ltd_active))
179 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
182 mutex_unlock(&lov->lov_lock);
184 init_waitqueue_head(&waitq);
185 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
186 cfs_time_seconds(1), NULL, NULL);
188 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
189 if (tgt != NULL && tgt->ltd_active)
195 mutex_unlock(&lov->lov_lock);
199 extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
200 struct lov_oinfo *loi, int flags,
201 struct ost_lvb *lvb, __u32 mode, int rc);
203 static int lov_update_enqueue_lov(struct obd_export *exp,
204 struct lustre_handle *lov_lockhp,
205 struct lov_oinfo *loi, int flags, int idx,
206 struct ost_id *oi, int rc)
208 struct lov_obd *lov = &exp->exp_obd->u.lov;
210 if (rc != ELDLM_OK &&
211 !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
212 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
213 if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
214 /* -EUSERS used by OST to report file contention */
215 if (rc != -EINTR && rc != -EUSERS)
216 CERROR("%s: enqueue objid "DOSTID" subobj"
217 DOSTID" on OST idx %d: rc %d\n",
218 exp->exp_obd->obd_name,
219 POSTID(oi), POSTID(&loi->loi_oi),
220 loi->loi_ost_idx, rc);
227 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
229 struct lov_request_set *set = req->rq_rqset;
230 struct lustre_handle *lov_lockhp;
231 struct obd_info *oi = set->set_oi;
232 struct lov_oinfo *loi;
236 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
237 loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
239 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
240 * and that copy can be arbitrarily out of date.
242 * The LOV API is due for a serious rewriting anyways, and this
243 * can be addressed then. */
245 lov_stripe_lock(oi->oi_md);
246 osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
247 &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
248 if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
249 memset(lov_lockhp, 0, sizeof *lov_lockhp);
250 rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
251 req->rq_idx, &oi->oi_md->lsm_oi, rc);
252 lov_stripe_unlock(oi->oi_md);
253 lov_update_set(set, req, rc);
257 /* The callback for osc_enqueue that updates lov info for every OSC request. */
258 static int cb_update_enqueue(void *cookie, int rc)
260 struct obd_info *oinfo = cookie;
261 struct ldlm_enqueue_info *einfo;
262 struct lov_request *lovreq;
264 lovreq = container_of(oinfo, struct lov_request, rq_oi);
265 einfo = lovreq->rq_rqset->set_ei;
266 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
269 static int enqueue_done(struct lov_request_set *set, __u32 mode)
271 struct lov_request *req;
272 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
273 int completes = atomic_read(&set->set_completes);
276 /* enqueue/match success, just return */
277 if (completes && completes == atomic_read(&set->set_success))
280 /* cancel enqueued/matched locks */
281 list_for_each_entry(req, &set->set_list, rq_link) {
282 struct lustre_handle *lov_lockhp;
284 if (!req->rq_complete || req->rq_rc)
287 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
289 if (!lustre_handle_is_used(lov_lockhp))
292 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
293 req->rq_oi.oi_md, mode, lov_lockhp);
294 if (rc && lov->lov_tgts[req->rq_idx] &&
295 lov->lov_tgts[req->rq_idx]->ltd_active)
296 CERROR("%s: cancelling obdjid "DOSTID" on OST"
297 "idx %d error: rc = %d\n",
298 set->set_exp->exp_obd->obd_name,
299 POSTID(&req->rq_oi.oi_md->lsm_oi),
303 lov_llh_put(set->set_lockh);
307 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
308 struct ptlrpc_request_set *rqset)
314 LASSERT(set->set_exp);
315 /* Do enqueue_done only for sync requests and if any request
319 atomic_set(&set->set_completes, 0);
320 ret = enqueue_done(set, mode);
321 } else if (set->set_lockh)
322 lov_llh_put(set->set_lockh);
326 RETURN(rc ? rc : ret);
329 static void lov_llh_addref(void *llhp)
331 struct lov_lock_handles *llh = llhp;
333 atomic_inc(&llh->llh_refcount);
334 CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
335 atomic_read(&llh->llh_refcount));
338 static struct portals_handle_ops lov_handle_ops = {
339 .hop_addref = lov_llh_addref,
343 static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
345 struct lov_lock_handles *llh;
347 OBD_ALLOC(llh, sizeof *llh +
348 sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
352 atomic_set(&llh->llh_refcount, 2);
353 llh->llh_stripe_count = lsm->lsm_stripe_count;
354 INIT_LIST_HEAD(&llh->llh_handle.h_link);
355 class_handle_hash(&llh->llh_handle, &lov_handle_ops);
360 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
361 struct ldlm_enqueue_info *einfo,
362 struct lov_request_set **reqset)
364 struct lov_obd *lov = &exp->exp_obd->u.lov;
365 struct lov_request_set *set;
368 OBD_ALLOC(set, sizeof(*set));
376 set->set_lockh = lov_llh_new(oinfo->oi_md);
377 if (set->set_lockh == NULL)
378 GOTO(out_set, rc = -ENOMEM);
379 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
381 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
382 struct lov_oinfo *loi;
383 struct lov_request *req;
386 loi = oinfo->oi_md->lsm_oinfo[i];
387 if (!lov_stripe_intersects(oinfo->oi_md, i,
388 oinfo->oi_policy.l_extent.start,
389 oinfo->oi_policy.l_extent.end,
393 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
394 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
398 OBD_ALLOC(req, sizeof(*req));
400 GOTO(out_set, rc = -ENOMEM);
402 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
403 sizeof(struct lov_oinfo *) +
404 sizeof(struct lov_oinfo);
405 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
406 if (req->rq_oi.oi_md == NULL) {
407 OBD_FREE(req, sizeof(*req));
408 GOTO(out_set, rc = -ENOMEM);
410 req->rq_oi.oi_md->lsm_oinfo[0] =
411 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
412 sizeof(struct lov_oinfo *);
414 /* Set lov request specific parameters. */
415 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
416 req->rq_oi.oi_cb_up = cb_update_enqueue;
417 req->rq_oi.oi_flags = oinfo->oi_flags;
419 LASSERT(req->rq_oi.oi_lockh);
421 req->rq_oi.oi_policy.l_extent.gid =
422 oinfo->oi_policy.l_extent.gid;
423 req->rq_oi.oi_policy.l_extent.start = start;
424 req->rq_oi.oi_policy.l_extent.end = end;
426 req->rq_idx = loi->loi_ost_idx;
429 /* XXX LOV STACKING: submd should be from the subobj */
430 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
431 req->rq_oi.oi_md->lsm_stripe_count = 0;
432 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
434 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
435 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
437 lov_set_add_req(req, set);
440 GOTO(out_set, rc = -EIO);
444 lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
448 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
454 LASSERT(set->set_exp);
455 rc = enqueue_done(set, mode);
456 if ((set->set_count == atomic_read(&set->set_success)) &&
457 (flags & LDLM_FL_TEST_LOCK))
458 lov_llh_put(set->set_lockh);
465 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
466 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
467 __u32 mode, struct lustre_handle *lockh,
468 struct lov_request_set **reqset)
470 struct lov_obd *lov = &exp->exp_obd->u.lov;
471 struct lov_request_set *set;
474 OBD_ALLOC(set, sizeof(*set));
481 set->set_oi->oi_md = lsm;
482 set->set_lockh = lov_llh_new(lsm);
483 if (set->set_lockh == NULL)
484 GOTO(out_set, rc = -ENOMEM);
485 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
487 for (i = 0; i < lsm->lsm_stripe_count; i++){
488 struct lov_oinfo *loi;
489 struct lov_request *req;
492 loi = lsm->lsm_oinfo[i];
493 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
494 policy->l_extent.end, &start, &end))
497 /* FIXME raid1 should grace this error */
498 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
499 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
500 GOTO(out_set, rc = -EIO);
503 OBD_ALLOC(req, sizeof(*req));
505 GOTO(out_set, rc = -ENOMEM);
507 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
508 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
509 if (req->rq_oi.oi_md == NULL) {
510 OBD_FREE(req, sizeof(*req));
511 GOTO(out_set, rc = -ENOMEM);
514 req->rq_oi.oi_policy.l_extent.start = start;
515 req->rq_oi.oi_policy.l_extent.end = end;
516 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
518 req->rq_idx = loi->loi_ost_idx;
521 /* XXX LOV STACKING: submd should be from the subobj */
522 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
523 req->rq_oi.oi_md->lsm_stripe_count = 0;
525 lov_set_add_req(req, set);
528 GOTO(out_set, rc = -EIO);
532 lov_fini_match_set(set, mode, 0);
536 int lov_fini_cancel_set(struct lov_request_set *set)
543 LASSERT(set->set_exp);
545 lov_llh_put(set->set_lockh);
552 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
553 struct lov_stripe_md *lsm, __u32 mode,
554 struct lustre_handle *lockh,
555 struct lov_request_set **reqset)
557 struct lov_request_set *set;
560 OBD_ALLOC(set, sizeof(*set));
567 set->set_oi->oi_md = lsm;
568 set->set_lockh = lov_handle2llh(lockh);
569 if (set->set_lockh == NULL) {
570 CERROR("LOV: invalid lov lock handle %p\n", lockh);
571 GOTO(out_set, rc = -EINVAL);
573 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
575 for (i = 0; i < lsm->lsm_stripe_count; i++){
576 struct lov_request *req;
577 struct lustre_handle *lov_lockhp;
578 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
580 lov_lockhp = set->set_lockh->llh_handles + i;
581 if (!lustre_handle_is_used(lov_lockhp)) {
582 CDEBUG(D_INFO, "lov idx %d subobj "DOSTID" no lock\n",
583 loi->loi_ost_idx, POSTID(&loi->loi_oi));
587 OBD_ALLOC(req, sizeof(*req));
589 GOTO(out_set, rc = -ENOMEM);
591 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
592 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
593 if (req->rq_oi.oi_md == NULL) {
594 OBD_FREE(req, sizeof(*req));
595 GOTO(out_set, rc = -ENOMEM);
598 req->rq_idx = loi->loi_ost_idx;
601 /* XXX LOV STACKING: submd should be from the subobj */
602 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
603 req->rq_oi.oi_md->lsm_stripe_count = 0;
605 lov_set_add_req(req, set);
608 GOTO(out_set, rc = -EIO);
612 lov_fini_cancel_set(set);
615 static int common_attr_done(struct lov_request_set *set)
617 struct list_head *pos;
618 struct lov_request *req;
620 int rc = 0, attrset = 0;
622 LASSERT(set->set_oi != NULL);
624 if (set->set_oi->oi_oa == NULL)
627 if (!atomic_read(&set->set_success))
632 GOTO(out, rc = -ENOMEM);
634 list_for_each (pos, &set->set_list) {
635 req = list_entry(pos, struct lov_request, rq_link);
637 if (!req->rq_complete || req->rq_rc)
639 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
641 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
642 req->rq_oi.oi_oa->o_valid,
643 set->set_oi->oi_md, req->rq_stripe, &attrset);
646 CERROR("No stripes had valid attrs\n");
649 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
650 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
651 /* When we take attributes of some epoch, we require all the
652 * ost to be active. */
653 CERROR("Not all the stripes had valid attrs\n");
654 GOTO(out, rc = -EIO);
657 tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
658 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
666 static int brw_done(struct lov_request_set *set)
668 struct lov_stripe_md *lsm = set->set_oi->oi_md;
669 struct lov_oinfo *loi = NULL;
670 struct list_head *pos;
671 struct lov_request *req;
673 list_for_each (pos, &set->set_list) {
674 req = list_entry(pos, struct lov_request, rq_link);
676 if (!req->rq_complete || req->rq_rc)
679 loi = lsm->lsm_oinfo[req->rq_stripe];
681 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
682 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
688 int lov_fini_brw_set(struct lov_request_set *set)
694 LASSERT(set->set_exp);
695 if (atomic_read(&set->set_completes)) {
697 /* FIXME update qos data here */
704 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
705 obd_count oa_bufs, struct brw_page *pga,
706 struct obd_trans_info *oti,
707 struct lov_request_set **reqset)
714 struct lov_request_set *set;
715 struct lov_obd *lov = &exp->exp_obd->u.lov;
716 int rc = 0, i, shift;
718 OBD_ALLOC(set, sizeof(*set));
726 set->set_oabufs = oa_bufs;
727 OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
729 GOTO(out, rc = -ENOMEM);
731 OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
733 GOTO(out, rc = -ENOMEM);
735 /* calculate the page count for each stripe */
736 for (i = 0; i < oa_bufs; i++) {
737 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
738 info[stripe].count++;
741 /* alloc and initialize lov request */
743 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
744 struct lov_oinfo *loi = NULL;
745 struct lov_request *req;
747 if (info[i].count == 0)
750 loi = oinfo->oi_md->lsm_oinfo[i];
751 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
752 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
753 GOTO(out, rc = -EIO);
756 OBD_ALLOC(req, sizeof(*req));
758 GOTO(out, rc = -ENOMEM);
760 OBDO_ALLOC(req->rq_oi.oi_oa);
761 if (req->rq_oi.oi_oa == NULL) {
762 OBD_FREE(req, sizeof(*req));
763 GOTO(out, rc = -ENOMEM);
767 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
768 sizeof(*req->rq_oi.oi_oa));
770 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
771 req->rq_oi.oi_oa->o_stripe_idx = i;
773 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
774 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
775 if (req->rq_oi.oi_md == NULL) {
776 OBDO_FREE(req->rq_oi.oi_oa);
777 OBD_FREE(req, sizeof(*req));
778 GOTO(out, rc = -ENOMEM);
781 req->rq_idx = loi->loi_ost_idx;
784 /* XXX LOV STACKING */
785 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
786 req->rq_oabufs = info[i].count;
787 req->rq_pgaidx = shift;
788 shift += req->rq_oabufs;
790 /* remember the index for sort brw_page array */
791 info[i].index = req->rq_pgaidx;
793 req->rq_oi.oi_capa = oinfo->oi_capa;
795 lov_set_add_req(req, set);
798 GOTO(out, rc = -EIO);
800 /* rotate & sort the brw_page array */
801 for (i = 0; i < oa_bufs; i++) {
802 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
804 shift = info[stripe].index + info[stripe].off;
805 LASSERT(shift < oa_bufs);
806 set->set_pga[shift] = pga[i];
807 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
808 &set->set_pga[shift].off);
814 sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
819 lov_fini_brw_set(set);
824 int lov_fini_getattr_set(struct lov_request_set *set)
830 LASSERT(set->set_exp);
831 if (atomic_read(&set->set_completes))
832 rc = common_attr_done(set);
839 /* The callback for osc_getattr_async that finilizes a request info when a
840 * response is received. */
841 static int cb_getattr_update(void *cookie, int rc)
843 struct obd_info *oinfo = cookie;
844 struct lov_request *lovreq;
845 lovreq = container_of(oinfo, struct lov_request, rq_oi);
846 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
849 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
850 struct lov_request_set **reqset)
852 struct lov_request_set *set;
853 struct lov_obd *lov = &exp->exp_obd->u.lov;
856 OBD_ALLOC(set, sizeof(*set));
864 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
865 struct lov_oinfo *loi;
866 struct lov_request *req;
868 loi = oinfo->oi_md->lsm_oinfo[i];
869 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
870 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
871 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
872 /* SOM requires all the OSTs to be active. */
873 GOTO(out_set, rc = -EIO);
877 OBD_ALLOC(req, sizeof(*req));
879 GOTO(out_set, rc = -ENOMEM);
882 req->rq_idx = loi->loi_ost_idx;
884 OBDO_ALLOC(req->rq_oi.oi_oa);
885 if (req->rq_oi.oi_oa == NULL) {
886 OBD_FREE(req, sizeof(*req));
887 GOTO(out_set, rc = -ENOMEM);
889 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
890 sizeof(*req->rq_oi.oi_oa));
891 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
892 req->rq_oi.oi_cb_up = cb_getattr_update;
893 req->rq_oi.oi_capa = oinfo->oi_capa;
895 lov_set_add_req(req, set);
898 GOTO(out_set, rc = -EIO);
902 lov_fini_getattr_set(set);
906 int lov_fini_destroy_set(struct lov_request_set *set)
910 LASSERT(set->set_exp);
911 if (atomic_read(&set->set_completes)) {
912 /* FIXME update qos data here */
920 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
921 struct obdo *src_oa, struct lov_stripe_md *lsm,
922 struct obd_trans_info *oti,
923 struct lov_request_set **reqset)
925 struct lov_request_set *set;
926 struct lov_obd *lov = &exp->exp_obd->u.lov;
929 OBD_ALLOC(set, sizeof(*set));
936 set->set_oi->oi_md = lsm;
937 set->set_oi->oi_oa = src_oa;
939 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
940 set->set_cookies = oti->oti_logcookies;
942 for (i = 0; i < lsm->lsm_stripe_count; i++) {
943 struct lov_oinfo *loi;
944 struct lov_request *req;
946 loi = lsm->lsm_oinfo[i];
947 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
948 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
952 OBD_ALLOC(req, sizeof(*req));
954 GOTO(out_set, rc = -ENOMEM);
957 req->rq_idx = loi->loi_ost_idx;
959 OBDO_ALLOC(req->rq_oi.oi_oa);
960 if (req->rq_oi.oi_oa == NULL) {
961 OBD_FREE(req, sizeof(*req));
962 GOTO(out_set, rc = -ENOMEM);
964 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
965 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
966 lov_set_add_req(req, set);
969 GOTO(out_set, rc = -EIO);
973 lov_fini_destroy_set(set);
977 int lov_fini_setattr_set(struct lov_request_set *set)
983 LASSERT(set->set_exp);
984 if (atomic_read(&set->set_completes)) {
985 rc = common_attr_done(set);
986 /* FIXME update qos data here */
993 int lov_update_setattr_set(struct lov_request_set *set,
994 struct lov_request *req, int rc)
996 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
997 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
999 lov_update_set(set, req, rc);
1001 /* grace error on inactive ost */
1002 if (rc && !(lov->lov_tgts[req->rq_idx] &&
1003 lov->lov_tgts[req->rq_idx]->ltd_active))
1007 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
1008 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
1009 req->rq_oi.oi_oa->o_ctime;
1010 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
1011 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
1012 req->rq_oi.oi_oa->o_mtime;
1013 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
1014 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
1015 req->rq_oi.oi_oa->o_atime;
1021 /* The callback for osc_setattr_async that finilizes a request info when a
1022 * response is received. */
1023 static int cb_setattr_update(void *cookie, int rc)
1025 struct obd_info *oinfo = cookie;
1026 struct lov_request *lovreq;
1027 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1028 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1031 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1032 struct obd_trans_info *oti,
1033 struct lov_request_set **reqset)
1035 struct lov_request_set *set;
1036 struct lov_obd *lov = &exp->exp_obd->u.lov;
1039 OBD_ALLOC(set, sizeof(*set));
1046 set->set_oi = oinfo;
1047 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1048 set->set_cookies = oti->oti_logcookies;
1050 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1051 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1052 struct lov_request *req;
1054 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1055 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1059 OBD_ALLOC(req, sizeof(*req));
1061 GOTO(out_set, rc = -ENOMEM);
1063 req->rq_idx = loi->loi_ost_idx;
1065 OBDO_ALLOC(req->rq_oi.oi_oa);
1066 if (req->rq_oi.oi_oa == NULL) {
1067 OBD_FREE(req, sizeof(*req));
1068 GOTO(out_set, rc = -ENOMEM);
1070 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1071 sizeof(*req->rq_oi.oi_oa));
1072 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1073 req->rq_oi.oi_oa->o_stripe_idx = i;
1074 req->rq_oi.oi_cb_up = cb_setattr_update;
1075 req->rq_oi.oi_capa = oinfo->oi_capa;
1077 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1078 int off = lov_stripe_offset(oinfo->oi_md,
1079 oinfo->oi_oa->o_size, i,
1080 &req->rq_oi.oi_oa->o_size);
1082 if (off < 0 && req->rq_oi.oi_oa->o_size)
1083 req->rq_oi.oi_oa->o_size--;
1085 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1086 i, req->rq_oi.oi_oa->o_size,
1087 oinfo->oi_oa->o_size);
1089 lov_set_add_req(req, set);
1091 if (!set->set_count)
1092 GOTO(out_set, rc = -EIO);
1096 lov_fini_setattr_set(set);
1100 int lov_fini_punch_set(struct lov_request_set *set)
1106 LASSERT(set->set_exp);
1107 if (atomic_read(&set->set_completes)) {
1109 /* FIXME update qos data here */
1110 if (atomic_read(&set->set_success))
1111 rc = common_attr_done(set);
1114 lov_put_reqset(set);
1119 int lov_update_punch_set(struct lov_request_set *set,
1120 struct lov_request *req, int rc)
1122 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1123 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1125 lov_update_set(set, req, rc);
1127 /* grace error on inactive ost */
1128 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1132 lov_stripe_lock(lsm);
1133 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1134 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1135 req->rq_oi.oi_oa->o_blocks;
1138 lov_stripe_unlock(lsm);
1144 /* The callback for osc_punch that finilizes a request info when a response
1146 static int cb_update_punch(void *cookie, int rc)
1148 struct obd_info *oinfo = cookie;
1149 struct lov_request *lovreq;
1150 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1151 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1154 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1155 struct obd_trans_info *oti,
1156 struct lov_request_set **reqset)
1158 struct lov_request_set *set;
1159 struct lov_obd *lov = &exp->exp_obd->u.lov;
1162 OBD_ALLOC(set, sizeof(*set));
1167 set->set_oi = oinfo;
1170 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1171 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1172 struct lov_request *req;
1175 if (!lov_stripe_intersects(oinfo->oi_md, i,
1176 oinfo->oi_policy.l_extent.start,
1177 oinfo->oi_policy.l_extent.end,
1181 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1182 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1183 GOTO(out_set, rc = -EIO);
1186 OBD_ALLOC(req, sizeof(*req));
1188 GOTO(out_set, rc = -ENOMEM);
1190 req->rq_idx = loi->loi_ost_idx;
1192 OBDO_ALLOC(req->rq_oi.oi_oa);
1193 if (req->rq_oi.oi_oa == NULL) {
1194 OBD_FREE(req, sizeof(*req));
1195 GOTO(out_set, rc = -ENOMEM);
1197 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1198 sizeof(*req->rq_oi.oi_oa));
1199 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1200 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1202 req->rq_oi.oi_oa->o_stripe_idx = i;
1203 req->rq_oi.oi_cb_up = cb_update_punch;
1205 req->rq_oi.oi_policy.l_extent.start = rs;
1206 req->rq_oi.oi_policy.l_extent.end = re;
1207 req->rq_oi.oi_policy.l_extent.gid = -1;
1209 req->rq_oi.oi_capa = oinfo->oi_capa;
1211 lov_set_add_req(req, set);
1213 if (!set->set_count)
1214 GOTO(out_set, rc = -EIO);
1218 lov_fini_punch_set(set);
1222 int lov_fini_sync_set(struct lov_request_set *set)
1228 LASSERT(set->set_exp);
1229 if (atomic_read(&set->set_completes)) {
1230 if (!atomic_read(&set->set_success))
1232 /* FIXME update qos data here */
1235 lov_put_reqset(set);
1240 /* The callback for osc_sync that finilizes a request info when a
1241 * response is recieved. */
1242 static int cb_sync_update(void *cookie, int rc)
1244 struct obd_info *oinfo = cookie;
1245 struct lov_request *lovreq;
1247 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1248 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1251 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1252 obd_off start, obd_off end,
1253 struct lov_request_set **reqset)
1255 struct lov_request_set *set;
1256 struct lov_obd *lov = &exp->exp_obd->u.lov;
1265 set->set_oi = oinfo;
1267 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1268 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1269 struct lov_request *req;
1272 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1273 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1277 if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
1283 GOTO(out_set, rc = -ENOMEM);
1285 req->rq_idx = loi->loi_ost_idx;
1287 OBDO_ALLOC(req->rq_oi.oi_oa);
1288 if (req->rq_oi.oi_oa == NULL) {
1289 OBD_FREE(req, sizeof(*req));
1290 GOTO(out_set, rc = -ENOMEM);
1292 *req->rq_oi.oi_oa = *oinfo->oi_oa;
1293 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1294 req->rq_oi.oi_oa->o_stripe_idx = i;
1296 req->rq_oi.oi_policy.l_extent.start = rs;
1297 req->rq_oi.oi_policy.l_extent.end = re;
1298 req->rq_oi.oi_policy.l_extent.gid = -1;
1299 req->rq_oi.oi_cb_up = cb_sync_update;
1301 lov_set_add_req(req, set);
1303 if (!set->set_count)
1304 GOTO(out_set, rc = -EIO);
1308 lov_fini_sync_set(set);
1312 #define LOV_U64_MAX ((__u64)~0ULL)
1313 #define LOV_SUM_MAX(tot, add) \
1315 if ((tot) + (add) < (tot)) \
1316 (tot) = LOV_U64_MAX; \
1321 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1324 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
1326 if (osfs->os_files != LOV_U64_MAX)
1327 lov_do_div64(osfs->os_files, expected_stripes);
1328 if (osfs->os_ffree != LOV_U64_MAX)
1329 lov_do_div64(osfs->os_ffree, expected_stripes);
1331 spin_lock(&obd->obd_osfs_lock);
1332 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1333 obd->obd_osfs_age = cfs_time_current_64();
1334 spin_unlock(&obd->obd_osfs_lock);
1341 int lov_fini_statfs_set(struct lov_request_set *set)
1348 if (atomic_read(&set->set_completes)) {
1349 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1350 atomic_read(&set->set_success));
1352 lov_put_reqset(set);
1356 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
1359 int shift = 0, quit = 0;
1363 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1365 if (osfs->os_bsize != lov_sfs->os_bsize) {
1366 /* assume all block sizes are always powers of 2 */
1367 /* get the bits difference */
1368 tmp = osfs->os_bsize | lov_sfs->os_bsize;
1369 for (shift = 0; shift <= 64; ++shift) {
1381 if (osfs->os_bsize < lov_sfs->os_bsize) {
1382 osfs->os_bsize = lov_sfs->os_bsize;
1384 osfs->os_bfree >>= shift;
1385 osfs->os_bavail >>= shift;
1386 osfs->os_blocks >>= shift;
1387 } else if (shift != 0) {
1388 lov_sfs->os_bfree >>= shift;
1389 lov_sfs->os_bavail >>= shift;
1390 lov_sfs->os_blocks >>= shift;
1392 osfs->os_bfree += lov_sfs->os_bfree;
1393 osfs->os_bavail += lov_sfs->os_bavail;
1394 osfs->os_blocks += lov_sfs->os_blocks;
1395 /* XXX not sure about this one - depends on policy.
1396 * - could be minimum if we always stripe on all OBDs
1397 * (but that would be wrong for any other policy,
1398 * if one of the OBDs has no more objects left)
1399 * - could be sum if we stripe whole objects
1400 * - could be average, just to give a nice number
1402 * To give a "reasonable" (if not wholly accurate)
1403 * number, we divide the total number of free objects
1404 * by expected stripe count (watch out for overflow).
1406 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1407 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1411 /* The callback for osc_statfs_async that finilizes a request info when a
1412 * response is received. */
1413 static int cb_statfs_update(void *cookie, int rc)
1415 struct obd_info *oinfo = cookie;
1416 struct lov_request *lovreq;
1417 struct lov_request_set *set;
1418 struct obd_statfs *osfs, *lov_sfs;
1419 struct lov_obd *lov;
1420 struct lov_tgt_desc *tgt;
1421 struct obd_device *lovobd, *tgtobd;
1424 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1425 set = lovreq->rq_rqset;
1426 lovobd = set->set_obd;
1427 lov = &lovobd->u.lov;
1428 osfs = set->set_oi->oi_osfs;
1429 lov_sfs = oinfo->oi_osfs;
1430 success = atomic_read(&set->set_success);
1431 /* XXX: the same is done in lov_update_common_set, however
1432 lovset->set_exp is not initialized. */
1433 lov_update_set(set, lovreq, rc);
1438 tgt = lov->lov_tgts[lovreq->rq_idx];
1439 if (!tgt || !tgt->ltd_active)
1440 GOTO(out_update, rc);
1442 tgtobd = class_exp2obd(tgt->ltd_exp);
1443 spin_lock(&tgtobd->obd_osfs_lock);
1444 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1445 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
1446 tgtobd->obd_osfs_age = cfs_time_current_64();
1447 spin_unlock(&tgtobd->obd_osfs_lock);
1450 lov_update_statfs(osfs, lov_sfs, success);
1454 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
1455 lov_set_finished(set, 0)) {
1456 lov_statfs_interpret(NULL, set, set->set_count !=
1457 atomic_read(&set->set_success));
1463 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1464 struct lov_request_set **reqset)
1466 struct lov_request_set *set;
1467 struct lov_obd *lov = &obd->u.lov;
1470 OBD_ALLOC(set, sizeof(*set));
1476 set->set_oi = oinfo;
1478 /* We only get block data from the OBD */
1479 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1480 struct lov_request *req;
1482 if (lov->lov_tgts[i] == NULL ||
1483 (!lov_check_and_wait_active(lov, i) &&
1484 (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
1485 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1489 /* skip targets that have been explicitely disabled by the
1491 if (!lov->lov_tgts[i]->ltd_exp) {
1492 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
1496 OBD_ALLOC(req, sizeof(*req));
1498 GOTO(out_set, rc = -ENOMEM);
1500 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1501 if (req->rq_oi.oi_osfs == NULL) {
1502 OBD_FREE(req, sizeof(*req));
1503 GOTO(out_set, rc = -ENOMEM);
1507 req->rq_oi.oi_cb_up = cb_statfs_update;
1508 req->rq_oi.oi_flags = oinfo->oi_flags;
1510 lov_set_add_req(req, set);
1512 if (!set->set_count)
1513 GOTO(out_set, rc = -EIO);
1517 lov_fini_statfs_set(set);