ceph: reuse request message when replaying against recovering mds
authorSage Weil <sage@newdream.net>
Thu, 15 Jul 2010 20:24:32 +0000 (13:24 -0700)
committerSage Weil <sage@newdream.net>
Fri, 16 Jul 2010 17:30:17 +0000 (10:30 -0700)
Replayed rename operations (after an mds failure/recovery) were broken
because the request paths were regenerated from the dentry names, which
get mangled when d_move() is called.

Instead, resend the previous request message when replaying completed
operations.  Just make sure the REPLAY flag is set and the target ino is
filled in.

This fixes problems with workloads doing renames when the MDS restarts,
where the rename operation appears to succeed, but on mds restart then
fails (leading to client confusion, app breakage, etc.).

Signed-off-by: Sage Weil <sage@newdream.net>
fs/ceph/mds_client.c

index 3ab79f6c4ce8808fa7c72a66adc23dcf0fcfc97d..23332bc4451521a9e0a623f7c5ce4f456ea121ff 100644 (file)
@@ -1580,6 +1580,27 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
        dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
             req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
 
+       if (req->r_got_unsafe) {
+               /*
+                * Replay.  Do not regenerate message (and rebuild
+                * paths, etc.); just use the original message.
+                * Rebuilding paths will break for renames because
+                * d_move mangles the src name.
+                */
+               msg = req->r_request;
+               rhead = msg->front.iov_base;
+
+               flags = le32_to_cpu(rhead->flags);
+               flags |= CEPH_MDS_FLAG_REPLAY;
+               rhead->flags = cpu_to_le32(flags);
+
+               if (req->r_target_inode)
+                       rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+               rhead->num_retry = req->r_attempts - 1;
+               return 0;
+       }
+
        if (req->r_request) {
                ceph_msg_put(req->r_request);
                req->r_request = NULL;
@@ -1601,13 +1622,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
        rhead->flags = cpu_to_le32(flags);
        rhead->num_fwd = req->r_num_fwd;
        rhead->num_retry = req->r_attempts - 1;
+       rhead->ino = 0;
 
        dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
-       if (req->r_target_inode && req->r_got_unsafe)
-               rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
-       else
-               rhead->ino = 0;
        return 0;
 }