Merge branch 'xfs-efi-rework' into for-next
[firefly-linux-kernel-4.4.55.git] / fs / xfs / xfs_log_recover.c
index 01dd228ca05e315b88feb7afbfaf3e81d728878b..2e40f5e3cdf204ec7c3e0a6874eb66ec19548061 100644 (file)
@@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn(
                uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
                break;
        case XFS_ATTR3_RMT_MAGIC:
-               lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
-               uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
-               break;
+               /*
+                * Remote attr blocks are written synchronously, rather than
+                * being logged. That means they do not contain a valid LSN
+                * (i.e. transactionally ordered) in them, and hence any time we
+                * see a buffer to replay over the top of a remote attribute
+                * block we should simply do so.
+                */
+               goto recover_immediately;
        case XFS_SB_MAGIC:
                lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
                uuid = &((struct xfs_dsb *)blk)->sb_uuid;
@@ -2928,16 +2933,16 @@ xlog_recover_efi_pass2(
        struct xlog_recover_item        *item,
        xfs_lsn_t                       lsn)
 {
-       int                     error;
-       xfs_mount_t             *mp = log->l_mp;
-       xfs_efi_log_item_t      *efip;
-       xfs_efi_log_format_t    *efi_formatp;
+       int                             error;
+       struct xfs_mount                *mp = log->l_mp;
+       struct xfs_efi_log_item         *efip;
+       struct xfs_efi_log_format       *efi_formatp;
 
        efi_formatp = item->ri_buf[0].i_addr;
 
        efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-       if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
-                                        &(efip->efi_format)))) {
+       error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+       if (error) {
                xfs_efi_item_free(efip);
                return error;
        }
@@ -2945,20 +2950,23 @@ xlog_recover_efi_pass2(
 
        spin_lock(&log->l_ailp->xa_lock);
        /*
-        * xfs_trans_ail_update() drops the AIL lock.
+        * The EFI has two references. One for the EFD and one for EFI to ensure
+        * it makes it into the AIL. Insert the EFI into the AIL directly and
+        * drop the EFI reference. Note that xfs_trans_ail_update() drops the
+        * AIL lock.
         */
        xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
+       xfs_efi_release(efip);
        return 0;
 }
 
 
 /*
- * This routine is called when an efd format structure is found in
- * a committed transaction in the log.  It's purpose is to cancel
- * the corresponding efi if it was still in the log.  To do this
- * it searches the AIL for the efi with an id equal to that in the
- * efd format structure.  If we find it, we remove the efi from the
- * AIL and free it.
+ * This routine is called when an EFD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding EFI if it
+ * was still in the log. To do this it searches the AIL for the EFI with an id
+ * equal to that in the EFD format structure. If we find it we drop the EFD
+ * reference, which removes the EFI from the AIL and frees it.
  */
 STATIC int
 xlog_recover_efd_pass2(
@@ -2980,8 +2988,8 @@ xlog_recover_efd_pass2(
        efi_id = efd_formatp->efd_efi_id;
 
        /*
-        * Search for the efi with the id in the efd format structure
-        * in the AIL.
+        * Search for the EFI with the id in the EFD format structure in the
+        * AIL.
         */
        spin_lock(&ailp->xa_lock);
        lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
@@ -2990,18 +2998,18 @@ xlog_recover_efd_pass2(
                        efip = (xfs_efi_log_item_t *)lip;
                        if (efip->efi_format.efi_id == efi_id) {
                                /*
-                                * xfs_trans_ail_delete() drops the
-                                * AIL lock.
+                                * Drop the EFD reference to the EFI. This
+                                * removes the EFI from the AIL and frees it.
                                 */
-                               xfs_trans_ail_delete(ailp, lip,
-                                                    SHUTDOWN_CORRUPT_INCORE);
-                               xfs_efi_item_free(efip);
+                               spin_unlock(&ailp->xa_lock);
+                               xfs_efi_release(efip);
                                spin_lock(&ailp->xa_lock);
                                break;
                        }
                }
                lip = xfs_trans_ail_cursor_next(ailp, &cur);
        }
+
        xfs_trans_ail_cursor_done(&cur);
        spin_unlock(&ailp->xa_lock);
 
@@ -3029,6 +3037,11 @@ xlog_recover_do_icreate_pass2(
        unsigned int            count;
        unsigned int            isize;
        xfs_agblock_t           length;
+       int                     blks_per_cluster;
+       int                     bb_per_cluster;
+       int                     cancel_count;
+       int                     nbufs;
+       int                     i;
 
        icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
        if (icl->icl_type != XFS_LI_ICREATE) {
@@ -3087,22 +3100,45 @@ xlog_recover_do_icreate_pass2(
        }
 
        /*
-        * Inode buffers can be freed. Do not replay the inode initialisation as
-        * we could be overwriting something written after this inode buffer was
-        * cancelled.
+        * The icreate transaction can cover multiple cluster buffers and these
+        * buffers could have been freed and reused. Check the individual
+        * buffers for cancellation so we don't overwrite anything written after
+        * a cancellation.
+        */
+       blks_per_cluster = xfs_icluster_size_fsb(mp);
+       bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster);
+       nbufs = length / blks_per_cluster;
+       for (i = 0, cancel_count = 0; i < nbufs; i++) {
+               xfs_daddr_t     daddr;
+
+               daddr = XFS_AGB_TO_DADDR(mp, agno,
+                                        agbno + i * blks_per_cluster);
+               if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
+                       cancel_count++;
+       }
+
+       /*
+        * We currently only use icreate for a single allocation at a time. This
+        * means we should expect either all or none of the buffers to be
+        * cancelled. Be conservative and skip replay if at least one buffer is
+        * cancelled, but warn the user that something is awry if the buffers
+        * are not consistent.
         *
-        * XXX: we need to iterate all buffers and only init those that are not
-        * cancelled. I think that a more fine grained factoring of
-        * xfs_ialloc_inode_init may be appropriate here to enable this to be
-        * done easily.
+        * XXX: This must be refined to only skip cancelled clusters once we use
+        * icreate for multiple chunk allocations.
         */
-       if (xlog_check_buffer_cancelled(log,
-                       XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
+       ASSERT(!cancel_count || cancel_count == nbufs);
+       if (cancel_count) {
+               if (cancel_count != nbufs)
+                       xfs_warn(mp,
+       "WARNING: partial inode chunk cancellation, skipped icreate.");
+               trace_xfs_log_recover_icreate_cancel(log, icl);
                return 0;
+       }
 
-       xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
-                             be32_to_cpu(icl->icl_gen));
-       return 0;
+       trace_xfs_log_recover_icreate_recover(log, icl);
+       return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
+                                    length, be32_to_cpu(icl->icl_gen));
 }
 
 STATIC void
@@ -3380,14 +3416,24 @@ xlog_recover_add_to_cont_trans(
        char                    *ptr, *old_ptr;
        int                     old_len;
 
+       /*
+        * If the transaction is empty, the header was split across this and the
+        * previous record. Copy the rest of the header.
+        */
        if (list_empty(&trans->r_itemq)) {
-               /* finish copying rest of trans header */
+               ASSERT(len < sizeof(struct xfs_trans_header));
+               if (len > sizeof(struct xfs_trans_header)) {
+                       xfs_warn(log->l_mp, "%s: bad header length", __func__);
+                       return -EIO;
+               }
+
                xlog_recover_add_item(&trans->r_itemq);
                ptr = (char *)&trans->r_theader +
-                               sizeof(xfs_trans_header_t) - len;
+                               sizeof(struct xfs_trans_header) - len;
                memcpy(ptr, dp, len);
                return 0;
        }
+
        /* take the tail entry */
        item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
 
@@ -3436,7 +3482,19 @@ xlog_recover_add_to_trans(
                        ASSERT(0);
                        return -EIO;
                }
-               if (len == sizeof(xfs_trans_header_t))
+
+               if (len > sizeof(struct xfs_trans_header)) {
+                       xfs_warn(log->l_mp, "%s: bad header length", __func__);
+                       ASSERT(0);
+                       return -EIO;
+               }
+
+               /*
+                * The transaction header can be arbitrarily split across op
+                * records. If we don't have the whole thing here, copy what we
+                * do have and handle the rest in the next record.
+                */
+               if (len == sizeof(struct xfs_trans_header))
                        xlog_recover_add_item(&trans->r_itemq);
                memcpy(&trans->r_theader, dp, len);
                return 0;
@@ -3739,7 +3797,7 @@ xlog_recover_process_efi(
                         * free the memory associated with it.
                         */
                        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
-                       xfs_efi_release(efip, efip->efi_format.efi_nextents);
+                       xfs_efi_release(efip);
                        return -EIO;
                }
        }
@@ -3752,11 +3810,11 @@ xlog_recover_process_efi(
 
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &(efip->efi_format.efi_extents[i]);
-               error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+               error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+                                             extp->ext_len);
                if (error)
                        goto abort_error;
-               xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
-                                        extp->ext_len);
+
        }
 
        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
@@ -3788,10 +3846,10 @@ abort_error:
  */
 STATIC int
 xlog_recover_process_efis(
-       struct xlog     *log)
+       struct xlog             *log)
 {
-       xfs_log_item_t          *lip;
-       xfs_efi_log_item_t      *efip;
+       struct xfs_log_item     *lip;
+       struct xfs_efi_log_item *efip;
        int                     error = 0;
        struct xfs_ail_cursor   cur;
        struct xfs_ail          *ailp;
@@ -3815,7 +3873,7 @@ xlog_recover_process_efis(
                /*
                 * Skip EFIs that we've already processed.
                 */
-               efip = (xfs_efi_log_item_t *)lip;
+               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
                if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
                        lip = xfs_trans_ail_cursor_next(ailp, &cur);
                        continue;
@@ -3834,6 +3892,50 @@ out:
        return error;
 }
 
+/*
+ * A cancel occurs when the mount has failed and we're bailing out. Release all
+ * pending EFIs so they don't pin the AIL.
+ */
+STATIC int
+xlog_recover_cancel_efis(
+       struct xlog             *log)
+{
+       struct xfs_log_item     *lip;
+       struct xfs_efi_log_item *efip;
+       int                     error = 0;
+       struct xfs_ail_cursor   cur;
+       struct xfs_ail          *ailp;
+
+       ailp = log->l_ailp;
+       spin_lock(&ailp->xa_lock);
+       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+       while (lip != NULL) {
+               /*
+                * We're done when we see something other than an EFI.
+                * There should be no EFIs left in the AIL now.
+                */
+               if (lip->li_type != XFS_LI_EFI) {
+#ifdef DEBUG
+                       for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
+                               ASSERT(lip->li_type != XFS_LI_EFI);
+#endif
+                       break;
+               }
+
+               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+
+               spin_unlock(&ailp->xa_lock);
+               xfs_efi_release(efip);
+               spin_lock(&ailp->xa_lock);
+
+               lip = xfs_trans_ail_cursor_next(ailp, &cur);
+       }
+
+       xfs_trans_ail_cursor_done(&cur);
+       spin_unlock(&ailp->xa_lock);
+       return error;
+}
+
 /*
  * This routine performs a transaction to null out a bad inode pointer
  * in an agi unlinked inode hash bucket.
@@ -4527,11 +4629,13 @@ xlog_recover(
                    xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
                                        XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
                        xfs_warn(log->l_mp,
-"Superblock has unknown incompatible log features (0x%x) enabled.\n"
-"The log can not be fully and/or safely recovered by this kernel.\n"
-"Please recover the log on a kernel that supports the unknown features.",
+"Superblock has unknown incompatible log features (0x%x) enabled.",
                                (log->l_mp->m_sb.sb_features_log_incompat &
                                        XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+                       xfs_warn(log->l_mp,
+"The log can not be fully and/or safely recovered by this kernel.");
+                       xfs_warn(log->l_mp,
+"Please recover the log on a kernel that supports the unknown features.");
                        return -EINVAL;
                }
 
@@ -4607,6 +4711,17 @@ xlog_recover_finish(
        return 0;
 }
 
+int
+xlog_recover_cancel(
+       struct xlog     *log)
+{
+       int             error = 0;
+
+       if (log->l_flags & XLOG_RECOVERY_NEEDED)
+               error = xlog_recover_cancel_efis(log);
+
+       return error;
+}
 
 #if defined(DEBUG)
 /*