cifs: fix use-after-free bug in find_writable_file
[firefly-linux-kernel-4.4.55.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         rc = server->ops->open(xid, tcon, full_path, disposition,
228                                desired_access, create_options, fid, oplock, buf,
229                                cifs_sb);
230
231         if (rc)
232                 goto out;
233
234         if (tcon->unix_ext)
235                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236                                               xid);
237         else
238                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239                                          xid, &fid->netfid);
240
241 out:
242         kfree(buf);
243         return rc;
244 }
245
246 static bool
247 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
248 {
249         struct cifs_fid_locks *cur;
250         bool has_locks = false;
251
252         down_read(&cinode->lock_sem);
253         list_for_each_entry(cur, &cinode->llist, llist) {
254                 if (!list_empty(&cur->locks)) {
255                         has_locks = true;
256                         break;
257                 }
258         }
259         up_read(&cinode->lock_sem);
260         return has_locks;
261 }
262
263 struct cifsFileInfo *
264 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
265                   struct tcon_link *tlink, __u32 oplock)
266 {
267         struct dentry *dentry = file->f_path.dentry;
268         struct inode *inode = dentry->d_inode;
269         struct cifsInodeInfo *cinode = CIFS_I(inode);
270         struct cifsFileInfo *cfile;
271         struct cifs_fid_locks *fdlocks;
272         struct cifs_tcon *tcon = tlink_tcon(tlink);
273         struct TCP_Server_Info *server = tcon->ses->server;
274
275         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
276         if (cfile == NULL)
277                 return cfile;
278
279         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
280         if (!fdlocks) {
281                 kfree(cfile);
282                 return NULL;
283         }
284
285         INIT_LIST_HEAD(&fdlocks->locks);
286         fdlocks->cfile = cfile;
287         cfile->llist = fdlocks;
288         down_write(&cinode->lock_sem);
289         list_add(&fdlocks->llist, &cinode->llist);
290         up_write(&cinode->lock_sem);
291
292         cfile->count = 1;
293         cfile->pid = current->tgid;
294         cfile->uid = current_fsuid();
295         cfile->dentry = dget(dentry);
296         cfile->f_flags = file->f_flags;
297         cfile->invalidHandle = false;
298         cfile->tlink = cifs_get_tlink(tlink);
299         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
300         mutex_init(&cfile->fh_mutex);
301
302         cifs_sb_active(inode->i_sb);
303
304         /*
305          * If the server returned a read oplock and we have mandatory brlocks,
306          * set oplock level to None.
307          */
308         if (oplock == server->vals->oplock_read &&
309                                                 cifs_has_mand_locks(cinode)) {
310                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
311                 oplock = 0;
312         }
313
314         spin_lock(&cifs_file_list_lock);
315         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
316                 oplock = fid->pending_open->oplock;
317         list_del(&fid->pending_open->olist);
318
319         server->ops->set_fid(cfile, fid, oplock);
320
321         list_add(&cfile->tlist, &tcon->openFileList);
322         /* if readable file instance put first in list*/
323         if (file->f_mode & FMODE_READ)
324                 list_add(&cfile->flist, &cinode->openFileList);
325         else
326                 list_add_tail(&cfile->flist, &cinode->openFileList);
327         spin_unlock(&cifs_file_list_lock);
328
329         file->private_data = cfile;
330         return cfile;
331 }
332
333 struct cifsFileInfo *
334 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
335 {
336         spin_lock(&cifs_file_list_lock);
337         cifsFileInfo_get_locked(cifs_file);
338         spin_unlock(&cifs_file_list_lock);
339         return cifs_file;
340 }
341
342 /*
343  * Release a reference on the file private data. This may involve closing
344  * the filehandle out on the server. Must be called without holding
345  * cifs_file_list_lock.
346  */
347 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
348 {
349         struct inode *inode = cifs_file->dentry->d_inode;
350         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
351         struct TCP_Server_Info *server = tcon->ses->server;
352         struct cifsInodeInfo *cifsi = CIFS_I(inode);
353         struct super_block *sb = inode->i_sb;
354         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
355         struct cifsLockInfo *li, *tmp;
356         struct cifs_fid fid;
357         struct cifs_pending_open open;
358
359         spin_lock(&cifs_file_list_lock);
360         if (--cifs_file->count > 0) {
361                 spin_unlock(&cifs_file_list_lock);
362                 return;
363         }
364
365         if (server->ops->get_lease_key)
366                 server->ops->get_lease_key(inode, &fid);
367
368         /* store open in pending opens to make sure we don't miss lease break */
369         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
370
371         /* remove it from the lists */
372         list_del(&cifs_file->flist);
373         list_del(&cifs_file->tlist);
374
375         if (list_empty(&cifsi->openFileList)) {
376                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
377                          cifs_file->dentry->d_inode);
378                 /*
379                  * In strict cache mode we need invalidate mapping on the last
380                  * close  because it may cause a error when we open this file
381                  * again and get at least level II oplock.
382                  */
383                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
384                         CIFS_I(inode)->invalid_mapping = true;
385                 cifs_set_oplock_level(cifsi, 0);
386         }
387         spin_unlock(&cifs_file_list_lock);
388
389         cancel_work_sync(&cifs_file->oplock_break);
390
391         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
392                 struct TCP_Server_Info *server = tcon->ses->server;
393                 unsigned int xid;
394
395                 xid = get_xid();
396                 if (server->ops->close)
397                         server->ops->close(xid, tcon, &cifs_file->fid);
398                 _free_xid(xid);
399         }
400
401         cifs_del_pending_open(&open);
402
403         /*
404          * Delete any outstanding lock records. We'll lose them when the file
405          * is closed anyway.
406          */
407         down_write(&cifsi->lock_sem);
408         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
409                 list_del(&li->llist);
410                 cifs_del_lock_waiters(li);
411                 kfree(li);
412         }
413         list_del(&cifs_file->llist->llist);
414         kfree(cifs_file->llist);
415         up_write(&cifsi->lock_sem);
416
417         cifs_put_tlink(cifs_file->tlink);
418         dput(cifs_file->dentry);
419         cifs_sb_deactive(sb);
420         kfree(cifs_file);
421 }
422
423 int cifs_open(struct inode *inode, struct file *file)
424
425 {
426         int rc = -EACCES;
427         unsigned int xid;
428         __u32 oplock;
429         struct cifs_sb_info *cifs_sb;
430         struct TCP_Server_Info *server;
431         struct cifs_tcon *tcon;
432         struct tcon_link *tlink;
433         struct cifsFileInfo *cfile = NULL;
434         char *full_path = NULL;
435         bool posix_open_ok = false;
436         struct cifs_fid fid;
437         struct cifs_pending_open open;
438
439         xid = get_xid();
440
441         cifs_sb = CIFS_SB(inode->i_sb);
442         tlink = cifs_sb_tlink(cifs_sb);
443         if (IS_ERR(tlink)) {
444                 free_xid(xid);
445                 return PTR_ERR(tlink);
446         }
447         tcon = tlink_tcon(tlink);
448         server = tcon->ses->server;
449
450         full_path = build_path_from_dentry(file->f_path.dentry);
451         if (full_path == NULL) {
452                 rc = -ENOMEM;
453                 goto out;
454         }
455
456         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
457                  inode, file->f_flags, full_path);
458
459         if (server->oplocks)
460                 oplock = REQ_OPLOCK;
461         else
462                 oplock = 0;
463
464         if (!tcon->broken_posix_open && tcon->unix_ext &&
465             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
466                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
467                 /* can not refresh inode info since size could be stale */
468                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
469                                 cifs_sb->mnt_file_mode /* ignored */,
470                                 file->f_flags, &oplock, &fid.netfid, xid);
471                 if (rc == 0) {
472                         cifs_dbg(FYI, "posix open succeeded\n");
473                         posix_open_ok = true;
474                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
475                         if (tcon->ses->serverNOS)
476                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
477                                          tcon->ses->serverName,
478                                          tcon->ses->serverNOS);
479                         tcon->broken_posix_open = true;
480                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
481                          (rc != -EOPNOTSUPP)) /* path not found or net err */
482                         goto out;
483                 /*
484                  * Else fallthrough to retry open the old way on network i/o
485                  * or DFS errors.
486                  */
487         }
488
489         if (server->ops->get_lease_key)
490                 server->ops->get_lease_key(inode, &fid);
491
492         cifs_add_pending_open(&fid, tlink, &open);
493
494         if (!posix_open_ok) {
495                 if (server->ops->get_lease_key)
496                         server->ops->get_lease_key(inode, &fid);
497
498                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
499                                   file->f_flags, &oplock, &fid, xid);
500                 if (rc) {
501                         cifs_del_pending_open(&open);
502                         goto out;
503                 }
504         }
505
506         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
507         if (cfile == NULL) {
508                 if (server->ops->close)
509                         server->ops->close(xid, tcon, &fid);
510                 cifs_del_pending_open(&open);
511                 rc = -ENOMEM;
512                 goto out;
513         }
514
515         cifs_fscache_set_inode_cookie(inode, file);
516
517         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
518                 /*
519                  * Time to set mode which we can not set earlier due to
520                  * problems creating new read-only files.
521                  */
522                 struct cifs_unix_set_info_args args = {
523                         .mode   = inode->i_mode,
524                         .uid    = INVALID_UID, /* no change */
525                         .gid    = INVALID_GID, /* no change */
526                         .ctime  = NO_CHANGE_64,
527                         .atime  = NO_CHANGE_64,
528                         .mtime  = NO_CHANGE_64,
529                         .device = 0,
530                 };
531                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
532                                        cfile->pid);
533         }
534
535 out:
536         kfree(full_path);
537         free_xid(xid);
538         cifs_put_tlink(tlink);
539         return rc;
540 }
541
542 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
543
544 /*
545  * Try to reacquire byte range locks that were released when session
546  * to server was lost.
547  */
548 static int
549 cifs_relock_file(struct cifsFileInfo *cfile)
550 {
551         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
552         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
553         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
554         int rc = 0;
555
556         down_read(&cinode->lock_sem);
557         if (cinode->can_cache_brlcks) {
558                 /* can cache locks - no need to relock */
559                 up_read(&cinode->lock_sem);
560                 return rc;
561         }
562
563         if (cap_unix(tcon->ses) &&
564             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
565             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
566                 rc = cifs_push_posix_locks(cfile);
567         else
568                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
569
570         up_read(&cinode->lock_sem);
571         return rc;
572 }
573
574 static int
575 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
576 {
577         int rc = -EACCES;
578         unsigned int xid;
579         __u32 oplock;
580         struct cifs_sb_info *cifs_sb;
581         struct cifs_tcon *tcon;
582         struct TCP_Server_Info *server;
583         struct cifsInodeInfo *cinode;
584         struct inode *inode;
585         char *full_path = NULL;
586         int desired_access;
587         int disposition = FILE_OPEN;
588         int create_options = CREATE_NOT_DIR;
589         struct cifs_fid fid;
590
591         xid = get_xid();
592         mutex_lock(&cfile->fh_mutex);
593         if (!cfile->invalidHandle) {
594                 mutex_unlock(&cfile->fh_mutex);
595                 rc = 0;
596                 free_xid(xid);
597                 return rc;
598         }
599
600         inode = cfile->dentry->d_inode;
601         cifs_sb = CIFS_SB(inode->i_sb);
602         tcon = tlink_tcon(cfile->tlink);
603         server = tcon->ses->server;
604
605         /*
606          * Can not grab rename sem here because various ops, including those
607          * that already have the rename sem can end up causing writepage to get
608          * called and if the server was down that means we end up here, and we
609          * can never tell if the caller already has the rename_sem.
610          */
611         full_path = build_path_from_dentry(cfile->dentry);
612         if (full_path == NULL) {
613                 rc = -ENOMEM;
614                 mutex_unlock(&cfile->fh_mutex);
615                 free_xid(xid);
616                 return rc;
617         }
618
619         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
620                  inode, cfile->f_flags, full_path);
621
622         if (tcon->ses->server->oplocks)
623                 oplock = REQ_OPLOCK;
624         else
625                 oplock = 0;
626
627         if (tcon->unix_ext && cap_unix(tcon->ses) &&
628             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
629                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
630                 /*
631                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
632                  * original open. Must mask them off for a reopen.
633                  */
634                 unsigned int oflags = cfile->f_flags &
635                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
636
637                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
638                                      cifs_sb->mnt_file_mode /* ignored */,
639                                      oflags, &oplock, &fid.netfid, xid);
640                 if (rc == 0) {
641                         cifs_dbg(FYI, "posix reopen succeeded\n");
642                         goto reopen_success;
643                 }
644                 /*
645                  * fallthrough to retry open the old way on errors, especially
646                  * in the reconnect path it is important to retry hard
647                  */
648         }
649
650         desired_access = cifs_convert_flags(cfile->f_flags);
651
652         if (backup_cred(cifs_sb))
653                 create_options |= CREATE_OPEN_BACKUP_INTENT;
654
655         if (server->ops->get_lease_key)
656                 server->ops->get_lease_key(inode, &fid);
657
658         /*
659          * Can not refresh inode by passing in file_info buf to be returned by
660          * CIFSSMBOpen and then calling get_inode_info with returned buf since
661          * file might have write behind data that needs to be flushed and server
662          * version of file size can be stale. If we knew for sure that inode was
663          * not dirty locally we could do this.
664          */
665         rc = server->ops->open(xid, tcon, full_path, disposition,
666                                desired_access, create_options, &fid, &oplock,
667                                NULL, cifs_sb);
668         if (rc) {
669                 mutex_unlock(&cfile->fh_mutex);
670                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
671                 cifs_dbg(FYI, "oplock: %d\n", oplock);
672                 goto reopen_error_exit;
673         }
674
675 reopen_success:
676         cfile->invalidHandle = false;
677         mutex_unlock(&cfile->fh_mutex);
678         cinode = CIFS_I(inode);
679
680         if (can_flush) {
681                 rc = filemap_write_and_wait(inode->i_mapping);
682                 mapping_set_error(inode->i_mapping, rc);
683
684                 if (tcon->unix_ext)
685                         rc = cifs_get_inode_info_unix(&inode, full_path,
686                                                       inode->i_sb, xid);
687                 else
688                         rc = cifs_get_inode_info(&inode, full_path, NULL,
689                                                  inode->i_sb, xid, NULL);
690         }
691         /*
692          * Else we are writing out data to server already and could deadlock if
693          * we tried to flush data, and since we do not know if we have data that
694          * would invalidate the current end of file on the server we can not go
695          * to the server to get the new inode info.
696          */
697
698         server->ops->set_fid(cfile, &fid, oplock);
699         cifs_relock_file(cfile);
700
701 reopen_error_exit:
702         kfree(full_path);
703         free_xid(xid);
704         return rc;
705 }
706
707 int cifs_close(struct inode *inode, struct file *file)
708 {
709         if (file->private_data != NULL) {
710                 cifsFileInfo_put(file->private_data);
711                 file->private_data = NULL;
712         }
713
714         /* return code from the ->release op is always ignored */
715         return 0;
716 }
717
718 int cifs_closedir(struct inode *inode, struct file *file)
719 {
720         int rc = 0;
721         unsigned int xid;
722         struct cifsFileInfo *cfile = file->private_data;
723         struct cifs_tcon *tcon;
724         struct TCP_Server_Info *server;
725         char *buf;
726
727         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
728
729         if (cfile == NULL)
730                 return rc;
731
732         xid = get_xid();
733         tcon = tlink_tcon(cfile->tlink);
734         server = tcon->ses->server;
735
736         cifs_dbg(FYI, "Freeing private data in close dir\n");
737         spin_lock(&cifs_file_list_lock);
738         if (server->ops->dir_needs_close(cfile)) {
739                 cfile->invalidHandle = true;
740                 spin_unlock(&cifs_file_list_lock);
741                 if (server->ops->close_dir)
742                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
743                 else
744                         rc = -ENOSYS;
745                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
746                 /* not much we can do if it fails anyway, ignore rc */
747                 rc = 0;
748         } else
749                 spin_unlock(&cifs_file_list_lock);
750
751         buf = cfile->srch_inf.ntwrk_buf_start;
752         if (buf) {
753                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
754                 cfile->srch_inf.ntwrk_buf_start = NULL;
755                 if (cfile->srch_inf.smallBuf)
756                         cifs_small_buf_release(buf);
757                 else
758                         cifs_buf_release(buf);
759         }
760
761         cifs_put_tlink(cfile->tlink);
762         kfree(file->private_data);
763         file->private_data = NULL;
764         /* BB can we lock the filestruct while this is going on? */
765         free_xid(xid);
766         return rc;
767 }
768
769 static struct cifsLockInfo *
770 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
771 {
772         struct cifsLockInfo *lock =
773                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
774         if (!lock)
775                 return lock;
776         lock->offset = offset;
777         lock->length = length;
778         lock->type = type;
779         lock->pid = current->tgid;
780         INIT_LIST_HEAD(&lock->blist);
781         init_waitqueue_head(&lock->block_q);
782         return lock;
783 }
784
785 void
786 cifs_del_lock_waiters(struct cifsLockInfo *lock)
787 {
788         struct cifsLockInfo *li, *tmp;
789         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
790                 list_del_init(&li->blist);
791                 wake_up(&li->block_q);
792         }
793 }
794
795 #define CIFS_LOCK_OP    0
796 #define CIFS_READ_OP    1
797 #define CIFS_WRITE_OP   2
798
799 /* @rw_check : 0 - no op, 1 - read, 2 - write */
800 static bool
801 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
802                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
803                             struct cifsLockInfo **conf_lock, int rw_check)
804 {
805         struct cifsLockInfo *li;
806         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
807         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
808
809         list_for_each_entry(li, &fdlocks->locks, llist) {
810                 if (offset + length <= li->offset ||
811                     offset >= li->offset + li->length)
812                         continue;
813                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
814                     server->ops->compare_fids(cfile, cur_cfile)) {
815                         /* shared lock prevents write op through the same fid */
816                         if (!(li->type & server->vals->shared_lock_type) ||
817                             rw_check != CIFS_WRITE_OP)
818                                 continue;
819                 }
820                 if ((type & server->vals->shared_lock_type) &&
821                     ((server->ops->compare_fids(cfile, cur_cfile) &&
822                      current->tgid == li->pid) || type == li->type))
823                         continue;
824                 if (conf_lock)
825                         *conf_lock = li;
826                 return true;
827         }
828         return false;
829 }
830
831 bool
832 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
833                         __u8 type, struct cifsLockInfo **conf_lock,
834                         int rw_check)
835 {
836         bool rc = false;
837         struct cifs_fid_locks *cur;
838         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
839
840         list_for_each_entry(cur, &cinode->llist, llist) {
841                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
842                                                  cfile, conf_lock, rw_check);
843                 if (rc)
844                         break;
845         }
846
847         return rc;
848 }
849
850 /*
851  * Check if there is another lock that prevents us to set the lock (mandatory
852  * style). If such a lock exists, update the flock structure with its
853  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
854  * or leave it the same if we can't. Returns 0 if we don't need to request to
855  * the server or 1 otherwise.
856  */
857 static int
858 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
859                __u8 type, struct file_lock *flock)
860 {
861         int rc = 0;
862         struct cifsLockInfo *conf_lock;
863         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
864         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
865         bool exist;
866
867         down_read(&cinode->lock_sem);
868
869         exist = cifs_find_lock_conflict(cfile, offset, length, type,
870                                         &conf_lock, CIFS_LOCK_OP);
871         if (exist) {
872                 flock->fl_start = conf_lock->offset;
873                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
874                 flock->fl_pid = conf_lock->pid;
875                 if (conf_lock->type & server->vals->shared_lock_type)
876                         flock->fl_type = F_RDLCK;
877                 else
878                         flock->fl_type = F_WRLCK;
879         } else if (!cinode->can_cache_brlcks)
880                 rc = 1;
881         else
882                 flock->fl_type = F_UNLCK;
883
884         up_read(&cinode->lock_sem);
885         return rc;
886 }
887
888 static void
889 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
890 {
891         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
892         down_write(&cinode->lock_sem);
893         list_add_tail(&lock->llist, &cfile->llist->locks);
894         up_write(&cinode->lock_sem);
895 }
896
897 /*
898  * Set the byte-range lock (mandatory style). Returns:
899  * 1) 0, if we set the lock and don't need to request to the server;
900  * 2) 1, if no locks prevent us but we need to request to the server;
901  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
902  */
903 static int
904 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
905                  bool wait)
906 {
907         struct cifsLockInfo *conf_lock;
908         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
909         bool exist;
910         int rc = 0;
911
912 try_again:
913         exist = false;
914         down_write(&cinode->lock_sem);
915
916         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
917                                         lock->type, &conf_lock, CIFS_LOCK_OP);
918         if (!exist && cinode->can_cache_brlcks) {
919                 list_add_tail(&lock->llist, &cfile->llist->locks);
920                 up_write(&cinode->lock_sem);
921                 return rc;
922         }
923
924         if (!exist)
925                 rc = 1;
926         else if (!wait)
927                 rc = -EACCES;
928         else {
929                 list_add_tail(&lock->blist, &conf_lock->blist);
930                 up_write(&cinode->lock_sem);
931                 rc = wait_event_interruptible(lock->block_q,
932                                         (lock->blist.prev == &lock->blist) &&
933                                         (lock->blist.next == &lock->blist));
934                 if (!rc)
935                         goto try_again;
936                 down_write(&cinode->lock_sem);
937                 list_del_init(&lock->blist);
938         }
939
940         up_write(&cinode->lock_sem);
941         return rc;
942 }
943
944 /*
945  * Check if there is another lock that prevents us to set the lock (posix
946  * style). If such a lock exists, update the flock structure with its
947  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
948  * or leave it the same if we can't. Returns 0 if we don't need to request to
949  * the server or 1 otherwise.
950  */
951 static int
952 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
953 {
954         int rc = 0;
955         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
956         unsigned char saved_type = flock->fl_type;
957
958         if ((flock->fl_flags & FL_POSIX) == 0)
959                 return 1;
960
961         down_read(&cinode->lock_sem);
962         posix_test_lock(file, flock);
963
964         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
965                 flock->fl_type = saved_type;
966                 rc = 1;
967         }
968
969         up_read(&cinode->lock_sem);
970         return rc;
971 }
972
973 /*
974  * Set the byte-range lock (posix style). Returns:
975  * 1) 0, if we set the lock and don't need to request to the server;
976  * 2) 1, if we need to request to the server;
977  * 3) <0, if the error occurs while setting the lock.
978  */
979 static int
980 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
981 {
982         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
983         int rc = 1;
984
985         if ((flock->fl_flags & FL_POSIX) == 0)
986                 return rc;
987
988 try_again:
989         down_write(&cinode->lock_sem);
990         if (!cinode->can_cache_brlcks) {
991                 up_write(&cinode->lock_sem);
992                 return rc;
993         }
994
995         rc = posix_lock_file(file, flock, NULL);
996         up_write(&cinode->lock_sem);
997         if (rc == FILE_LOCK_DEFERRED) {
998                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
999                 if (!rc)
1000                         goto try_again;
1001                 locks_delete_block(flock);
1002         }
1003         return rc;
1004 }
1005
1006 int
1007 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1008 {
1009         unsigned int xid;
1010         int rc = 0, stored_rc;
1011         struct cifsLockInfo *li, *tmp;
1012         struct cifs_tcon *tcon;
1013         unsigned int num, max_num, max_buf;
1014         LOCKING_ANDX_RANGE *buf, *cur;
1015         int types[] = {LOCKING_ANDX_LARGE_FILES,
1016                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1017         int i;
1018
1019         xid = get_xid();
1020         tcon = tlink_tcon(cfile->tlink);
1021
1022         /*
1023          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1024          * and check it for zero before using.
1025          */
1026         max_buf = tcon->ses->server->maxBuf;
1027         if (!max_buf) {
1028                 free_xid(xid);
1029                 return -EINVAL;
1030         }
1031
1032         max_num = (max_buf - sizeof(struct smb_hdr)) /
1033                                                 sizeof(LOCKING_ANDX_RANGE);
1034         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1035         if (!buf) {
1036                 free_xid(xid);
1037                 return -ENOMEM;
1038         }
1039
1040         for (i = 0; i < 2; i++) {
1041                 cur = buf;
1042                 num = 0;
1043                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1044                         if (li->type != types[i])
1045                                 continue;
1046                         cur->Pid = cpu_to_le16(li->pid);
1047                         cur->LengthLow = cpu_to_le32((u32)li->length);
1048                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1049                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1050                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1051                         if (++num == max_num) {
1052                                 stored_rc = cifs_lockv(xid, tcon,
1053                                                        cfile->fid.netfid,
1054                                                        (__u8)li->type, 0, num,
1055                                                        buf);
1056                                 if (stored_rc)
1057                                         rc = stored_rc;
1058                                 cur = buf;
1059                                 num = 0;
1060                         } else
1061                                 cur++;
1062                 }
1063
1064                 if (num) {
1065                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1066                                                (__u8)types[i], 0, num, buf);
1067                         if (stored_rc)
1068                                 rc = stored_rc;
1069                 }
1070         }
1071
1072         kfree(buf);
1073         free_xid(xid);
1074         return rc;
1075 }
1076
1077 /* copied from fs/locks.c with a name change */
1078 #define cifs_for_each_lock(inode, lockp) \
1079         for (lockp = &inode->i_flock; *lockp != NULL; \
1080              lockp = &(*lockp)->fl_next)
1081
1082 struct lock_to_push {
1083         struct list_head llist;
1084         __u64 offset;
1085         __u64 length;
1086         __u32 pid;
1087         __u16 netfid;
1088         __u8 type;
1089 };
1090
1091 static int
1092 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1093 {
1094         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1095         struct file_lock *flock, **before;
1096         unsigned int count = 0, i = 0;
1097         int rc = 0, xid, type;
1098         struct list_head locks_to_send, *el;
1099         struct lock_to_push *lck, *tmp;
1100         __u64 length;
1101
1102         xid = get_xid();
1103
1104         lock_flocks();
1105         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1106                 if ((*before)->fl_flags & FL_POSIX)
1107                         count++;
1108         }
1109         unlock_flocks();
1110
1111         INIT_LIST_HEAD(&locks_to_send);
1112
1113         /*
1114          * Allocating count locks is enough because no FL_POSIX locks can be
1115          * added to the list while we are holding cinode->lock_sem that
1116          * protects locking operations of this inode.
1117          */
1118         for (; i < count; i++) {
1119                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1120                 if (!lck) {
1121                         rc = -ENOMEM;
1122                         goto err_out;
1123                 }
1124                 list_add_tail(&lck->llist, &locks_to_send);
1125         }
1126
1127         el = locks_to_send.next;
1128         lock_flocks();
1129         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1130                 flock = *before;
1131                 if ((flock->fl_flags & FL_POSIX) == 0)
1132                         continue;
1133                 if (el == &locks_to_send) {
1134                         /*
1135                          * The list ended. We don't have enough allocated
1136                          * structures - something is really wrong.
1137                          */
1138                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1139                         break;
1140                 }
1141                 length = 1 + flock->fl_end - flock->fl_start;
1142                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1143                         type = CIFS_RDLCK;
1144                 else
1145                         type = CIFS_WRLCK;
1146                 lck = list_entry(el, struct lock_to_push, llist);
1147                 lck->pid = flock->fl_pid;
1148                 lck->netfid = cfile->fid.netfid;
1149                 lck->length = length;
1150                 lck->type = type;
1151                 lck->offset = flock->fl_start;
1152                 el = el->next;
1153         }
1154         unlock_flocks();
1155
1156         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1157                 int stored_rc;
1158
1159                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1160                                              lck->offset, lck->length, NULL,
1161                                              lck->type, 0);
1162                 if (stored_rc)
1163                         rc = stored_rc;
1164                 list_del(&lck->llist);
1165                 kfree(lck);
1166         }
1167
1168 out:
1169         free_xid(xid);
1170         return rc;
1171 err_out:
1172         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1173                 list_del(&lck->llist);
1174                 kfree(lck);
1175         }
1176         goto out;
1177 }
1178
1179 static int
1180 cifs_push_locks(struct cifsFileInfo *cfile)
1181 {
1182         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1183         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1184         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1185         int rc = 0;
1186
1187         /* we are going to update can_cache_brlcks here - need a write access */
1188         down_write(&cinode->lock_sem);
1189         if (!cinode->can_cache_brlcks) {
1190                 up_write(&cinode->lock_sem);
1191                 return rc;
1192         }
1193
1194         if (cap_unix(tcon->ses) &&
1195             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1196             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1197                 rc = cifs_push_posix_locks(cfile);
1198         else
1199                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1200
1201         cinode->can_cache_brlcks = false;
1202         up_write(&cinode->lock_sem);
1203         return rc;
1204 }
1205
1206 static void
1207 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1208                 bool *wait_flag, struct TCP_Server_Info *server)
1209 {
1210         if (flock->fl_flags & FL_POSIX)
1211                 cifs_dbg(FYI, "Posix\n");
1212         if (flock->fl_flags & FL_FLOCK)
1213                 cifs_dbg(FYI, "Flock\n");
1214         if (flock->fl_flags & FL_SLEEP) {
1215                 cifs_dbg(FYI, "Blocking lock\n");
1216                 *wait_flag = true;
1217         }
1218         if (flock->fl_flags & FL_ACCESS)
1219                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1220         if (flock->fl_flags & FL_LEASE)
1221                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1222         if (flock->fl_flags &
1223             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1224                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1225                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1226
1227         *type = server->vals->large_lock_type;
1228         if (flock->fl_type == F_WRLCK) {
1229                 cifs_dbg(FYI, "F_WRLCK\n");
1230                 *type |= server->vals->exclusive_lock_type;
1231                 *lock = 1;
1232         } else if (flock->fl_type == F_UNLCK) {
1233                 cifs_dbg(FYI, "F_UNLCK\n");
1234                 *type |= server->vals->unlock_lock_type;
1235                 *unlock = 1;
1236                 /* Check if unlock includes more than one lock range */
1237         } else if (flock->fl_type == F_RDLCK) {
1238                 cifs_dbg(FYI, "F_RDLCK\n");
1239                 *type |= server->vals->shared_lock_type;
1240                 *lock = 1;
1241         } else if (flock->fl_type == F_EXLCK) {
1242                 cifs_dbg(FYI, "F_EXLCK\n");
1243                 *type |= server->vals->exclusive_lock_type;
1244                 *lock = 1;
1245         } else if (flock->fl_type == F_SHLCK) {
1246                 cifs_dbg(FYI, "F_SHLCK\n");
1247                 *type |= server->vals->shared_lock_type;
1248                 *lock = 1;
1249         } else
1250                 cifs_dbg(FYI, "Unknown type of lock\n");
1251 }
1252
1253 static int
1254 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1255            bool wait_flag, bool posix_lck, unsigned int xid)
1256 {
1257         int rc = 0;
1258         __u64 length = 1 + flock->fl_end - flock->fl_start;
1259         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1260         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1261         struct TCP_Server_Info *server = tcon->ses->server;
1262         __u16 netfid = cfile->fid.netfid;
1263
1264         if (posix_lck) {
1265                 int posix_lock_type;
1266
1267                 rc = cifs_posix_lock_test(file, flock);
1268                 if (!rc)
1269                         return rc;
1270
1271                 if (type & server->vals->shared_lock_type)
1272                         posix_lock_type = CIFS_RDLCK;
1273                 else
1274                         posix_lock_type = CIFS_WRLCK;
1275                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1276                                       flock->fl_start, length, flock,
1277                                       posix_lock_type, wait_flag);
1278                 return rc;
1279         }
1280
1281         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1282         if (!rc)
1283                 return rc;
1284
1285         /* BB we could chain these into one lock request BB */
1286         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1287                                     1, 0, false);
1288         if (rc == 0) {
1289                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1290                                             type, 0, 1, false);
1291                 flock->fl_type = F_UNLCK;
1292                 if (rc != 0)
1293                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1294                                  rc);
1295                 return 0;
1296         }
1297
1298         if (type & server->vals->shared_lock_type) {
1299                 flock->fl_type = F_WRLCK;
1300                 return 0;
1301         }
1302
1303         type &= ~server->vals->exclusive_lock_type;
1304
1305         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1306                                     type | server->vals->shared_lock_type,
1307                                     1, 0, false);
1308         if (rc == 0) {
1309                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1310                         type | server->vals->shared_lock_type, 0, 1, false);
1311                 flock->fl_type = F_RDLCK;
1312                 if (rc != 0)
1313                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1314                                  rc);
1315         } else
1316                 flock->fl_type = F_WRLCK;
1317
1318         return 0;
1319 }
1320
1321 void
1322 cifs_move_llist(struct list_head *source, struct list_head *dest)
1323 {
1324         struct list_head *li, *tmp;
1325         list_for_each_safe(li, tmp, source)
1326                 list_move(li, dest);
1327 }
1328
1329 void
1330 cifs_free_llist(struct list_head *llist)
1331 {
1332         struct cifsLockInfo *li, *tmp;
1333         list_for_each_entry_safe(li, tmp, llist, llist) {
1334                 cifs_del_lock_waiters(li);
1335                 list_del(&li->llist);
1336                 kfree(li);
1337         }
1338 }
1339
1340 int
1341 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1342                   unsigned int xid)
1343 {
1344         int rc = 0, stored_rc;
1345         int types[] = {LOCKING_ANDX_LARGE_FILES,
1346                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1347         unsigned int i;
1348         unsigned int max_num, num, max_buf;
1349         LOCKING_ANDX_RANGE *buf, *cur;
1350         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1351         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1352         struct cifsLockInfo *li, *tmp;
1353         __u64 length = 1 + flock->fl_end - flock->fl_start;
1354         struct list_head tmp_llist;
1355
1356         INIT_LIST_HEAD(&tmp_llist);
1357
1358         /*
1359          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1360          * and check it for zero before using.
1361          */
1362         max_buf = tcon->ses->server->maxBuf;
1363         if (!max_buf)
1364                 return -EINVAL;
1365
1366         max_num = (max_buf - sizeof(struct smb_hdr)) /
1367                                                 sizeof(LOCKING_ANDX_RANGE);
1368         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1369         if (!buf)
1370                 return -ENOMEM;
1371
1372         down_write(&cinode->lock_sem);
1373         for (i = 0; i < 2; i++) {
1374                 cur = buf;
1375                 num = 0;
1376                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1377                         if (flock->fl_start > li->offset ||
1378                             (flock->fl_start + length) <
1379                             (li->offset + li->length))
1380                                 continue;
1381                         if (current->tgid != li->pid)
1382                                 continue;
1383                         if (types[i] != li->type)
1384                                 continue;
1385                         if (cinode->can_cache_brlcks) {
1386                                 /*
1387                                  * We can cache brlock requests - simply remove
1388                                  * a lock from the file's list.
1389                                  */
1390                                 list_del(&li->llist);
1391                                 cifs_del_lock_waiters(li);
1392                                 kfree(li);
1393                                 continue;
1394                         }
1395                         cur->Pid = cpu_to_le16(li->pid);
1396                         cur->LengthLow = cpu_to_le32((u32)li->length);
1397                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1398                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1399                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1400                         /*
1401                          * We need to save a lock here to let us add it again to
1402                          * the file's list if the unlock range request fails on
1403                          * the server.
1404                          */
1405                         list_move(&li->llist, &tmp_llist);
1406                         if (++num == max_num) {
1407                                 stored_rc = cifs_lockv(xid, tcon,
1408                                                        cfile->fid.netfid,
1409                                                        li->type, num, 0, buf);
1410                                 if (stored_rc) {
1411                                         /*
1412                                          * We failed on the unlock range
1413                                          * request - add all locks from the tmp
1414                                          * list to the head of the file's list.
1415                                          */
1416                                         cifs_move_llist(&tmp_llist,
1417                                                         &cfile->llist->locks);
1418                                         rc = stored_rc;
1419                                 } else
1420                                         /*
1421                                          * The unlock range request succeed -
1422                                          * free the tmp list.
1423                                          */
1424                                         cifs_free_llist(&tmp_llist);
1425                                 cur = buf;
1426                                 num = 0;
1427                         } else
1428                                 cur++;
1429                 }
1430                 if (num) {
1431                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1432                                                types[i], num, 0, buf);
1433                         if (stored_rc) {
1434                                 cifs_move_llist(&tmp_llist,
1435                                                 &cfile->llist->locks);
1436                                 rc = stored_rc;
1437                         } else
1438                                 cifs_free_llist(&tmp_llist);
1439                 }
1440         }
1441
1442         up_write(&cinode->lock_sem);
1443         kfree(buf);
1444         return rc;
1445 }
1446
1447 static int
1448 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1449            bool wait_flag, bool posix_lck, int lock, int unlock,
1450            unsigned int xid)
1451 {
1452         int rc = 0;
1453         __u64 length = 1 + flock->fl_end - flock->fl_start;
1454         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1455         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1456         struct TCP_Server_Info *server = tcon->ses->server;
1457         struct inode *inode = cfile->dentry->d_inode;
1458
1459         if (posix_lck) {
1460                 int posix_lock_type;
1461
1462                 rc = cifs_posix_lock_set(file, flock);
1463                 if (!rc || rc < 0)
1464                         return rc;
1465
1466                 if (type & server->vals->shared_lock_type)
1467                         posix_lock_type = CIFS_RDLCK;
1468                 else
1469                         posix_lock_type = CIFS_WRLCK;
1470
1471                 if (unlock == 1)
1472                         posix_lock_type = CIFS_UNLCK;
1473
1474                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1475                                       current->tgid, flock->fl_start, length,
1476                                       NULL, posix_lock_type, wait_flag);
1477                 goto out;
1478         }
1479
1480         if (lock) {
1481                 struct cifsLockInfo *lock;
1482
1483                 lock = cifs_lock_init(flock->fl_start, length, type);
1484                 if (!lock)
1485                         return -ENOMEM;
1486
1487                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1488                 if (rc < 0) {
1489                         kfree(lock);
1490                         return rc;
1491                 }
1492                 if (!rc)
1493                         goto out;
1494
1495                 /*
1496                  * Windows 7 server can delay breaking lease from read to None
1497                  * if we set a byte-range lock on a file - break it explicitly
1498                  * before sending the lock to the server to be sure the next
1499                  * read won't conflict with non-overlapted locks due to
1500                  * pagereading.
1501                  */
1502                 if (!CIFS_I(inode)->clientCanCacheAll &&
1503                                         CIFS_I(inode)->clientCanCacheRead) {
1504                         cifs_invalidate_mapping(inode);
1505                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1506                                  inode);
1507                         CIFS_I(inode)->clientCanCacheRead = false;
1508                 }
1509
1510                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1511                                             type, 1, 0, wait_flag);
1512                 if (rc) {
1513                         kfree(lock);
1514                         return rc;
1515                 }
1516
1517                 cifs_lock_add(cfile, lock);
1518         } else if (unlock)
1519                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1520
1521 out:
1522         if (flock->fl_flags & FL_POSIX)
1523                 posix_lock_file_wait(file, flock);
1524         return rc;
1525 }
1526
1527 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1528 {
1529         int rc, xid;
1530         int lock = 0, unlock = 0;
1531         bool wait_flag = false;
1532         bool posix_lck = false;
1533         struct cifs_sb_info *cifs_sb;
1534         struct cifs_tcon *tcon;
1535         struct cifsInodeInfo *cinode;
1536         struct cifsFileInfo *cfile;
1537         __u16 netfid;
1538         __u32 type;
1539
1540         rc = -EACCES;
1541         xid = get_xid();
1542
1543         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1544                  cmd, flock->fl_flags, flock->fl_type,
1545                  flock->fl_start, flock->fl_end);
1546
1547         cfile = (struct cifsFileInfo *)file->private_data;
1548         tcon = tlink_tcon(cfile->tlink);
1549
1550         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1551                         tcon->ses->server);
1552
1553         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1554         netfid = cfile->fid.netfid;
1555         cinode = CIFS_I(file_inode(file));
1556
1557         if (cap_unix(tcon->ses) &&
1558             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1559             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1560                 posix_lck = true;
1561         /*
1562          * BB add code here to normalize offset and length to account for
1563          * negative length which we can not accept over the wire.
1564          */
1565         if (IS_GETLK(cmd)) {
1566                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1567                 free_xid(xid);
1568                 return rc;
1569         }
1570
1571         if (!lock && !unlock) {
1572                 /*
1573                  * if no lock or unlock then nothing to do since we do not
1574                  * know what it is
1575                  */
1576                 free_xid(xid);
1577                 return -EOPNOTSUPP;
1578         }
1579
1580         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1581                         xid);
1582         free_xid(xid);
1583         return rc;
1584 }
1585
1586 /*
1587  * update the file size (if needed) after a write. Should be called with
1588  * the inode->i_lock held
1589  */
1590 void
1591 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1592                       unsigned int bytes_written)
1593 {
1594         loff_t end_of_write = offset + bytes_written;
1595
1596         if (end_of_write > cifsi->server_eof)
1597                 cifsi->server_eof = end_of_write;
1598 }
1599
1600 static ssize_t
1601 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1602            size_t write_size, loff_t *offset)
1603 {
1604         int rc = 0;
1605         unsigned int bytes_written = 0;
1606         unsigned int total_written;
1607         struct cifs_sb_info *cifs_sb;
1608         struct cifs_tcon *tcon;
1609         struct TCP_Server_Info *server;
1610         unsigned int xid;
1611         struct dentry *dentry = open_file->dentry;
1612         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1613         struct cifs_io_parms io_parms;
1614
1615         cifs_sb = CIFS_SB(dentry->d_sb);
1616
1617         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1618                  write_size, *offset, dentry->d_name.name);
1619
1620         tcon = tlink_tcon(open_file->tlink);
1621         server = tcon->ses->server;
1622
1623         if (!server->ops->sync_write)
1624                 return -ENOSYS;
1625
1626         xid = get_xid();
1627
1628         for (total_written = 0; write_size > total_written;
1629              total_written += bytes_written) {
1630                 rc = -EAGAIN;
1631                 while (rc == -EAGAIN) {
1632                         struct kvec iov[2];
1633                         unsigned int len;
1634
1635                         if (open_file->invalidHandle) {
1636                                 /* we could deadlock if we called
1637                                    filemap_fdatawait from here so tell
1638                                    reopen_file not to flush data to
1639                                    server now */
1640                                 rc = cifs_reopen_file(open_file, false);
1641                                 if (rc != 0)
1642                                         break;
1643                         }
1644
1645                         len = min((size_t)cifs_sb->wsize,
1646                                   write_size - total_written);
1647                         /* iov[0] is reserved for smb header */
1648                         iov[1].iov_base = (char *)write_data + total_written;
1649                         iov[1].iov_len = len;
1650                         io_parms.pid = pid;
1651                         io_parms.tcon = tcon;
1652                         io_parms.offset = *offset;
1653                         io_parms.length = len;
1654                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1655                                                      &bytes_written, iov, 1);
1656                 }
1657                 if (rc || (bytes_written == 0)) {
1658                         if (total_written)
1659                                 break;
1660                         else {
1661                                 free_xid(xid);
1662                                 return rc;
1663                         }
1664                 } else {
1665                         spin_lock(&dentry->d_inode->i_lock);
1666                         cifs_update_eof(cifsi, *offset, bytes_written);
1667                         spin_unlock(&dentry->d_inode->i_lock);
1668                         *offset += bytes_written;
1669                 }
1670         }
1671
1672         cifs_stats_bytes_written(tcon, total_written);
1673
1674         if (total_written > 0) {
1675                 spin_lock(&dentry->d_inode->i_lock);
1676                 if (*offset > dentry->d_inode->i_size)
1677                         i_size_write(dentry->d_inode, *offset);
1678                 spin_unlock(&dentry->d_inode->i_lock);
1679         }
1680         mark_inode_dirty_sync(dentry->d_inode);
1681         free_xid(xid);
1682         return total_written;
1683 }
1684
1685 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1686                                         bool fsuid_only)
1687 {
1688         struct cifsFileInfo *open_file = NULL;
1689         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1690
1691         /* only filter by fsuid on multiuser mounts */
1692         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1693                 fsuid_only = false;
1694
1695         spin_lock(&cifs_file_list_lock);
1696         /* we could simply get the first_list_entry since write-only entries
1697            are always at the end of the list but since the first entry might
1698            have a close pending, we go through the whole list */
1699         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1700                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1701                         continue;
1702                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1703                         if (!open_file->invalidHandle) {
1704                                 /* found a good file */
1705                                 /* lock it so it will not be closed on us */
1706                                 cifsFileInfo_get_locked(open_file);
1707                                 spin_unlock(&cifs_file_list_lock);
1708                                 return open_file;
1709                         } /* else might as well continue, and look for
1710                              another, or simply have the caller reopen it
1711                              again rather than trying to fix this handle */
1712                 } else /* write only file */
1713                         break; /* write only files are last so must be done */
1714         }
1715         spin_unlock(&cifs_file_list_lock);
1716         return NULL;
1717 }
1718
1719 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1720                                         bool fsuid_only)
1721 {
1722         struct cifsFileInfo *open_file, *inv_file = NULL;
1723         struct cifs_sb_info *cifs_sb;
1724         bool any_available = false;
1725         int rc;
1726         unsigned int refind = 0;
1727
1728         /* Having a null inode here (because mapping->host was set to zero by
1729         the VFS or MM) should not happen but we had reports of on oops (due to
1730         it being zero) during stress testcases so we need to check for it */
1731
1732         if (cifs_inode == NULL) {
1733                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1734                 dump_stack();
1735                 return NULL;
1736         }
1737
1738         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1739
1740         /* only filter by fsuid on multiuser mounts */
1741         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1742                 fsuid_only = false;
1743
1744         spin_lock(&cifs_file_list_lock);
1745 refind_writable:
1746         if (refind > MAX_REOPEN_ATT) {
1747                 spin_unlock(&cifs_file_list_lock);
1748                 return NULL;
1749         }
1750         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1751                 if (!any_available && open_file->pid != current->tgid)
1752                         continue;
1753                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1754                         continue;
1755                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1756                         if (!open_file->invalidHandle) {
1757                                 /* found a good writable file */
1758                                 cifsFileInfo_get_locked(open_file);
1759                                 spin_unlock(&cifs_file_list_lock);
1760                                 return open_file;
1761                         } else {
1762                                 if (!inv_file)
1763                                         inv_file = open_file;
1764                         }
1765                 }
1766         }
1767         /* couldn't find useable FH with same pid, try any available */
1768         if (!any_available) {
1769                 any_available = true;
1770                 goto refind_writable;
1771         }
1772
1773         if (inv_file) {
1774                 any_available = false;
1775                 cifsFileInfo_get_locked(inv_file);
1776         }
1777
1778         spin_unlock(&cifs_file_list_lock);
1779
1780         if (inv_file) {
1781                 rc = cifs_reopen_file(inv_file, false);
1782                 if (!rc)
1783                         return inv_file;
1784                 else {
1785                         spin_lock(&cifs_file_list_lock);
1786                         list_move_tail(&inv_file->flist,
1787                                         &cifs_inode->openFileList);
1788                         spin_unlock(&cifs_file_list_lock);
1789                         cifsFileInfo_put(inv_file);
1790                         spin_lock(&cifs_file_list_lock);
1791                         ++refind;
1792                         inv_file = NULL;
1793                         goto refind_writable;
1794                 }
1795         }
1796
1797         return NULL;
1798 }
1799
1800 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1801 {
1802         struct address_space *mapping = page->mapping;
1803         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1804         char *write_data;
1805         int rc = -EFAULT;
1806         int bytes_written = 0;
1807         struct inode *inode;
1808         struct cifsFileInfo *open_file;
1809
1810         if (!mapping || !mapping->host)
1811                 return -EFAULT;
1812
1813         inode = page->mapping->host;
1814
1815         offset += (loff_t)from;
1816         write_data = kmap(page);
1817         write_data += from;
1818
1819         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1820                 kunmap(page);
1821                 return -EIO;
1822         }
1823
1824         /* racing with truncate? */
1825         if (offset > mapping->host->i_size) {
1826                 kunmap(page);
1827                 return 0; /* don't care */
1828         }
1829
1830         /* check to make sure that we are not extending the file */
1831         if (mapping->host->i_size - offset < (loff_t)to)
1832                 to = (unsigned)(mapping->host->i_size - offset);
1833
1834         open_file = find_writable_file(CIFS_I(mapping->host), false);
1835         if (open_file) {
1836                 bytes_written = cifs_write(open_file, open_file->pid,
1837                                            write_data, to - from, &offset);
1838                 cifsFileInfo_put(open_file);
1839                 /* Does mm or vfs already set times? */
1840                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1841                 if ((bytes_written > 0) && (offset))
1842                         rc = 0;
1843                 else if (bytes_written < 0)
1844                         rc = bytes_written;
1845         } else {
1846                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1847                 rc = -EIO;
1848         }
1849
1850         kunmap(page);
1851         return rc;
1852 }
1853
1854 static int cifs_writepages(struct address_space *mapping,
1855                            struct writeback_control *wbc)
1856 {
1857         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1858         bool done = false, scanned = false, range_whole = false;
1859         pgoff_t end, index;
1860         struct cifs_writedata *wdata;
1861         struct TCP_Server_Info *server;
1862         struct page *page;
1863         int rc = 0;
1864
1865         /*
1866          * If wsize is smaller than the page cache size, default to writing
1867          * one page at a time via cifs_writepage
1868          */
1869         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1870                 return generic_writepages(mapping, wbc);
1871
1872         if (wbc->range_cyclic) {
1873                 index = mapping->writeback_index; /* Start from prev offset */
1874                 end = -1;
1875         } else {
1876                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1877                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1878                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1879                         range_whole = true;
1880                 scanned = true;
1881         }
1882 retry:
1883         while (!done && index <= end) {
1884                 unsigned int i, nr_pages, found_pages;
1885                 pgoff_t next = 0, tofind;
1886                 struct page **pages;
1887
1888                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1889                                 end - index) + 1;
1890
1891                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1892                                              cifs_writev_complete);
1893                 if (!wdata) {
1894                         rc = -ENOMEM;
1895                         break;
1896                 }
1897
1898                 /*
1899                  * find_get_pages_tag seems to return a max of 256 on each
1900                  * iteration, so we must call it several times in order to
1901                  * fill the array or the wsize is effectively limited to
1902                  * 256 * PAGE_CACHE_SIZE.
1903                  */
1904                 found_pages = 0;
1905                 pages = wdata->pages;
1906                 do {
1907                         nr_pages = find_get_pages_tag(mapping, &index,
1908                                                         PAGECACHE_TAG_DIRTY,
1909                                                         tofind, pages);
1910                         found_pages += nr_pages;
1911                         tofind -= nr_pages;
1912                         pages += nr_pages;
1913                 } while (nr_pages && tofind && index <= end);
1914
1915                 if (found_pages == 0) {
1916                         kref_put(&wdata->refcount, cifs_writedata_release);
1917                         break;
1918                 }
1919
1920                 nr_pages = 0;
1921                 for (i = 0; i < found_pages; i++) {
1922                         page = wdata->pages[i];
1923                         /*
1924                          * At this point we hold neither mapping->tree_lock nor
1925                          * lock on the page itself: the page may be truncated or
1926                          * invalidated (changing page->mapping to NULL), or even
1927                          * swizzled back from swapper_space to tmpfs file
1928                          * mapping
1929                          */
1930
1931                         if (nr_pages == 0)
1932                                 lock_page(page);
1933                         else if (!trylock_page(page))
1934                                 break;
1935
1936                         if (unlikely(page->mapping != mapping)) {
1937                                 unlock_page(page);
1938                                 break;
1939                         }
1940
1941                         if (!wbc->range_cyclic && page->index > end) {
1942                                 done = true;
1943                                 unlock_page(page);
1944                                 break;
1945                         }
1946
1947                         if (next && (page->index != next)) {
1948                                 /* Not next consecutive page */
1949                                 unlock_page(page);
1950                                 break;
1951                         }
1952
1953                         if (wbc->sync_mode != WB_SYNC_NONE)
1954                                 wait_on_page_writeback(page);
1955
1956                         if (PageWriteback(page) ||
1957                                         !clear_page_dirty_for_io(page)) {
1958                                 unlock_page(page);
1959                                 break;
1960                         }
1961
1962                         /*
1963                          * This actually clears the dirty bit in the radix tree.
1964                          * See cifs_writepage() for more commentary.
1965                          */
1966                         set_page_writeback(page);
1967
1968                         if (page_offset(page) >= i_size_read(mapping->host)) {
1969                                 done = true;
1970                                 unlock_page(page);
1971                                 end_page_writeback(page);
1972                                 break;
1973                         }
1974
1975                         wdata->pages[i] = page;
1976                         next = page->index + 1;
1977                         ++nr_pages;
1978                 }
1979
1980                 /* reset index to refind any pages skipped */
1981                 if (nr_pages == 0)
1982                         index = wdata->pages[0]->index + 1;
1983
1984                 /* put any pages we aren't going to use */
1985                 for (i = nr_pages; i < found_pages; i++) {
1986                         page_cache_release(wdata->pages[i]);
1987                         wdata->pages[i] = NULL;
1988                 }
1989
1990                 /* nothing to write? */
1991                 if (nr_pages == 0) {
1992                         kref_put(&wdata->refcount, cifs_writedata_release);
1993                         continue;
1994                 }
1995
1996                 wdata->sync_mode = wbc->sync_mode;
1997                 wdata->nr_pages = nr_pages;
1998                 wdata->offset = page_offset(wdata->pages[0]);
1999                 wdata->pagesz = PAGE_CACHE_SIZE;
2000                 wdata->tailsz =
2001                         min(i_size_read(mapping->host) -
2002                             page_offset(wdata->pages[nr_pages - 1]),
2003                             (loff_t)PAGE_CACHE_SIZE);
2004                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2005                                         wdata->tailsz;
2006
2007                 do {
2008                         if (wdata->cfile != NULL)
2009                                 cifsFileInfo_put(wdata->cfile);
2010                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2011                                                           false);
2012                         if (!wdata->cfile) {
2013                                 cifs_dbg(VFS, "No writable handles for inode\n");
2014                                 rc = -EBADF;
2015                                 break;
2016                         }
2017                         wdata->pid = wdata->cfile->pid;
2018                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2019                         rc = server->ops->async_writev(wdata);
2020                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2021
2022                 for (i = 0; i < nr_pages; ++i)
2023                         unlock_page(wdata->pages[i]);
2024
2025                 /* send failure -- clean up the mess */
2026                 if (rc != 0) {
2027                         for (i = 0; i < nr_pages; ++i) {
2028                                 if (rc == -EAGAIN)
2029                                         redirty_page_for_writepage(wbc,
2030                                                            wdata->pages[i]);
2031                                 else
2032                                         SetPageError(wdata->pages[i]);
2033                                 end_page_writeback(wdata->pages[i]);
2034                                 page_cache_release(wdata->pages[i]);
2035                         }
2036                         if (rc != -EAGAIN)
2037                                 mapping_set_error(mapping, rc);
2038                 }
2039                 kref_put(&wdata->refcount, cifs_writedata_release);
2040
2041                 wbc->nr_to_write -= nr_pages;
2042                 if (wbc->nr_to_write <= 0)
2043                         done = true;
2044
2045                 index = next;
2046         }
2047
2048         if (!scanned && !done) {
2049                 /*
2050                  * We hit the last page and there is more work to be done: wrap
2051                  * back to the start of the file
2052                  */
2053                 scanned = true;
2054                 index = 0;
2055                 goto retry;
2056         }
2057
2058         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2059                 mapping->writeback_index = index;
2060
2061         return rc;
2062 }
2063
2064 static int
2065 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2066 {
2067         int rc;
2068         unsigned int xid;
2069
2070         xid = get_xid();
2071 /* BB add check for wbc flags */
2072         page_cache_get(page);
2073         if (!PageUptodate(page))
2074                 cifs_dbg(FYI, "ppw - page not up to date\n");
2075
2076         /*
2077          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2078          *
2079          * A writepage() implementation always needs to do either this,
2080          * or re-dirty the page with "redirty_page_for_writepage()" in
2081          * the case of a failure.
2082          *
2083          * Just unlocking the page will cause the radix tree tag-bits
2084          * to fail to update with the state of the page correctly.
2085          */
2086         set_page_writeback(page);
2087 retry_write:
2088         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2089         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2090                 goto retry_write;
2091         else if (rc == -EAGAIN)
2092                 redirty_page_for_writepage(wbc, page);
2093         else if (rc != 0)
2094                 SetPageError(page);
2095         else
2096                 SetPageUptodate(page);
2097         end_page_writeback(page);
2098         page_cache_release(page);
2099         free_xid(xid);
2100         return rc;
2101 }
2102
2103 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2104 {
2105         int rc = cifs_writepage_locked(page, wbc);
2106         unlock_page(page);
2107         return rc;
2108 }
2109
2110 static int cifs_write_end(struct file *file, struct address_space *mapping,
2111                         loff_t pos, unsigned len, unsigned copied,
2112                         struct page *page, void *fsdata)
2113 {
2114         int rc;
2115         struct inode *inode = mapping->host;
2116         struct cifsFileInfo *cfile = file->private_data;
2117         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2118         __u32 pid;
2119
2120         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2121                 pid = cfile->pid;
2122         else
2123                 pid = current->tgid;
2124
2125         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2126                  page, pos, copied);
2127
2128         if (PageChecked(page)) {
2129                 if (copied == len)
2130                         SetPageUptodate(page);
2131                 ClearPageChecked(page);
2132         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2133                 SetPageUptodate(page);
2134
2135         if (!PageUptodate(page)) {
2136                 char *page_data;
2137                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2138                 unsigned int xid;
2139
2140                 xid = get_xid();
2141                 /* this is probably better than directly calling
2142                    partialpage_write since in this function the file handle is
2143                    known which we might as well leverage */
2144                 /* BB check if anything else missing out of ppw
2145                    such as updating last write time */
2146                 page_data = kmap(page);
2147                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2148                 /* if (rc < 0) should we set writebehind rc? */
2149                 kunmap(page);
2150
2151                 free_xid(xid);
2152         } else {
2153                 rc = copied;
2154                 pos += copied;
2155                 set_page_dirty(page);
2156         }
2157
2158         if (rc > 0) {
2159                 spin_lock(&inode->i_lock);
2160                 if (pos > inode->i_size)
2161                         i_size_write(inode, pos);
2162                 spin_unlock(&inode->i_lock);
2163         }
2164
2165         unlock_page(page);
2166         page_cache_release(page);
2167
2168         return rc;
2169 }
2170
2171 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2172                       int datasync)
2173 {
2174         unsigned int xid;
2175         int rc = 0;
2176         struct cifs_tcon *tcon;
2177         struct TCP_Server_Info *server;
2178         struct cifsFileInfo *smbfile = file->private_data;
2179         struct inode *inode = file_inode(file);
2180         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2181
2182         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2183         if (rc)
2184                 return rc;
2185         mutex_lock(&inode->i_mutex);
2186
2187         xid = get_xid();
2188
2189         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2190                  file->f_path.dentry->d_name.name, datasync);
2191
2192         if (!CIFS_I(inode)->clientCanCacheRead) {
2193                 rc = cifs_invalidate_mapping(inode);
2194                 if (rc) {
2195                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2196                         rc = 0; /* don't care about it in fsync */
2197                 }
2198         }
2199
2200         tcon = tlink_tcon(smbfile->tlink);
2201         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2202                 server = tcon->ses->server;
2203                 if (server->ops->flush)
2204                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2205                 else
2206                         rc = -ENOSYS;
2207         }
2208
2209         free_xid(xid);
2210         mutex_unlock(&inode->i_mutex);
2211         return rc;
2212 }
2213
2214 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2215 {
2216         unsigned int xid;
2217         int rc = 0;
2218         struct cifs_tcon *tcon;
2219         struct TCP_Server_Info *server;
2220         struct cifsFileInfo *smbfile = file->private_data;
2221         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2222         struct inode *inode = file->f_mapping->host;
2223
2224         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2225         if (rc)
2226                 return rc;
2227         mutex_lock(&inode->i_mutex);
2228
2229         xid = get_xid();
2230
2231         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2232                  file->f_path.dentry->d_name.name, datasync);
2233
2234         tcon = tlink_tcon(smbfile->tlink);
2235         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2236                 server = tcon->ses->server;
2237                 if (server->ops->flush)
2238                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2239                 else
2240                         rc = -ENOSYS;
2241         }
2242
2243         free_xid(xid);
2244         mutex_unlock(&inode->i_mutex);
2245         return rc;
2246 }
2247
2248 /*
2249  * As file closes, flush all cached write data for this inode checking
2250  * for write behind errors.
2251  */
2252 int cifs_flush(struct file *file, fl_owner_t id)
2253 {
2254         struct inode *inode = file_inode(file);
2255         int rc = 0;
2256
2257         if (file->f_mode & FMODE_WRITE)
2258                 rc = filemap_write_and_wait(inode->i_mapping);
2259
2260         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2261
2262         return rc;
2263 }
2264
2265 static int
2266 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2267 {
2268         int rc = 0;
2269         unsigned long i;
2270
2271         for (i = 0; i < num_pages; i++) {
2272                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2273                 if (!pages[i]) {
2274                         /*
2275                          * save number of pages we have already allocated and
2276                          * return with ENOMEM error
2277                          */
2278                         num_pages = i;
2279                         rc = -ENOMEM;
2280                         break;
2281                 }
2282         }
2283
2284         if (rc) {
2285                 for (i = 0; i < num_pages; i++)
2286                         put_page(pages[i]);
2287         }
2288         return rc;
2289 }
2290
2291 static inline
2292 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2293 {
2294         size_t num_pages;
2295         size_t clen;
2296
2297         clen = min_t(const size_t, len, wsize);
2298         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2299
2300         if (cur_len)
2301                 *cur_len = clen;
2302
2303         return num_pages;
2304 }
2305
2306 static void
2307 cifs_uncached_writev_complete(struct work_struct *work)
2308 {
2309         int i;
2310         struct cifs_writedata *wdata = container_of(work,
2311                                         struct cifs_writedata, work);
2312         struct inode *inode = wdata->cfile->dentry->d_inode;
2313         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2314
2315         spin_lock(&inode->i_lock);
2316         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2317         if (cifsi->server_eof > inode->i_size)
2318                 i_size_write(inode, cifsi->server_eof);
2319         spin_unlock(&inode->i_lock);
2320
2321         complete(&wdata->done);
2322
2323         if (wdata->result != -EAGAIN) {
2324                 for (i = 0; i < wdata->nr_pages; i++)
2325                         put_page(wdata->pages[i]);
2326         }
2327
2328         kref_put(&wdata->refcount, cifs_writedata_release);
2329 }
2330
2331 /* attempt to send write to server, retry on any -EAGAIN errors */
2332 static int
2333 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2334 {
2335         int rc;
2336         struct TCP_Server_Info *server;
2337
2338         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2339
2340         do {
2341                 if (wdata->cfile->invalidHandle) {
2342                         rc = cifs_reopen_file(wdata->cfile, false);
2343                         if (rc != 0)
2344                                 continue;
2345                 }
2346                 rc = server->ops->async_writev(wdata);
2347         } while (rc == -EAGAIN);
2348
2349         return rc;
2350 }
2351
2352 static ssize_t
2353 cifs_iovec_write(struct file *file, const struct iovec *iov,
2354                  unsigned long nr_segs, loff_t *poffset)
2355 {
2356         unsigned long nr_pages, i;
2357         size_t bytes, copied, len, cur_len;
2358         ssize_t total_written = 0;
2359         loff_t offset;
2360         struct iov_iter it;
2361         struct cifsFileInfo *open_file;
2362         struct cifs_tcon *tcon;
2363         struct cifs_sb_info *cifs_sb;
2364         struct cifs_writedata *wdata, *tmp;
2365         struct list_head wdata_list;
2366         int rc;
2367         pid_t pid;
2368
2369         len = iov_length(iov, nr_segs);
2370         if (!len)
2371                 return 0;
2372
2373         rc = generic_write_checks(file, poffset, &len, 0);
2374         if (rc)
2375                 return rc;
2376
2377         INIT_LIST_HEAD(&wdata_list);
2378         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2379         open_file = file->private_data;
2380         tcon = tlink_tcon(open_file->tlink);
2381
2382         if (!tcon->ses->server->ops->async_writev)
2383                 return -ENOSYS;
2384
2385         offset = *poffset;
2386
2387         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2388                 pid = open_file->pid;
2389         else
2390                 pid = current->tgid;
2391
2392         iov_iter_init(&it, iov, nr_segs, len, 0);
2393         do {
2394                 size_t save_len;
2395
2396                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2397                 wdata = cifs_writedata_alloc(nr_pages,
2398                                              cifs_uncached_writev_complete);
2399                 if (!wdata) {
2400                         rc = -ENOMEM;
2401                         break;
2402                 }
2403
2404                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2405                 if (rc) {
2406                         kfree(wdata);
2407                         break;
2408                 }
2409
2410                 save_len = cur_len;
2411                 for (i = 0; i < nr_pages; i++) {
2412                         bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2413                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2414                                                          0, bytes);
2415                         cur_len -= copied;
2416                         iov_iter_advance(&it, copied);
2417                         /*
2418                          * If we didn't copy as much as we expected, then that
2419                          * may mean we trod into an unmapped area. Stop copying
2420                          * at that point. On the next pass through the big
2421                          * loop, we'll likely end up getting a zero-length
2422                          * write and bailing out of it.
2423                          */
2424                         if (copied < bytes)
2425                                 break;
2426                 }
2427                 cur_len = save_len - cur_len;
2428
2429                 /*
2430                  * If we have no data to send, then that probably means that
2431                  * the copy above failed altogether. That's most likely because
2432                  * the address in the iovec was bogus. Set the rc to -EFAULT,
2433                  * free anything we allocated and bail out.
2434                  */
2435                 if (!cur_len) {
2436                         for (i = 0; i < nr_pages; i++)
2437                                 put_page(wdata->pages[i]);
2438                         kfree(wdata);
2439                         rc = -EFAULT;
2440                         break;
2441                 }
2442
2443                 /*
2444                  * i + 1 now represents the number of pages we actually used in
2445                  * the copy phase above. Bring nr_pages down to that, and free
2446                  * any pages that we didn't use.
2447                  */
2448                 for ( ; nr_pages > i + 1; nr_pages--)
2449                         put_page(wdata->pages[nr_pages - 1]);
2450
2451                 wdata->sync_mode = WB_SYNC_ALL;
2452                 wdata->nr_pages = nr_pages;
2453                 wdata->offset = (__u64)offset;
2454                 wdata->cfile = cifsFileInfo_get(open_file);
2455                 wdata->pid = pid;
2456                 wdata->bytes = cur_len;
2457                 wdata->pagesz = PAGE_SIZE;
2458                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2459                 rc = cifs_uncached_retry_writev(wdata);
2460                 if (rc) {
2461                         kref_put(&wdata->refcount, cifs_writedata_release);
2462                         break;
2463                 }
2464
2465                 list_add_tail(&wdata->list, &wdata_list);
2466                 offset += cur_len;
2467                 len -= cur_len;
2468         } while (len > 0);
2469
2470         /*
2471          * If at least one write was successfully sent, then discard any rc
2472          * value from the later writes. If the other write succeeds, then
2473          * we'll end up returning whatever was written. If it fails, then
2474          * we'll get a new rc value from that.
2475          */
2476         if (!list_empty(&wdata_list))
2477                 rc = 0;
2478
2479         /*
2480          * Wait for and collect replies for any successful sends in order of
2481          * increasing offset. Once an error is hit or we get a fatal signal
2482          * while waiting, then return without waiting for any more replies.
2483          */
2484 restart_loop:
2485         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2486                 if (!rc) {
2487                         /* FIXME: freezable too? */
2488                         rc = wait_for_completion_killable(&wdata->done);
2489                         if (rc)
2490                                 rc = -EINTR;
2491                         else if (wdata->result)
2492                                 rc = wdata->result;
2493                         else
2494                                 total_written += wdata->bytes;
2495
2496                         /* resend call if it's a retryable error */
2497                         if (rc == -EAGAIN) {
2498                                 rc = cifs_uncached_retry_writev(wdata);
2499                                 goto restart_loop;
2500                         }
2501                 }
2502                 list_del_init(&wdata->list);
2503                 kref_put(&wdata->refcount, cifs_writedata_release);
2504         }
2505
2506         if (total_written > 0)
2507                 *poffset += total_written;
2508
2509         cifs_stats_bytes_written(tcon, total_written);
2510         return total_written ? total_written : (ssize_t)rc;
2511 }
2512
2513 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2514                                 unsigned long nr_segs, loff_t pos)
2515 {
2516         ssize_t written;
2517         struct inode *inode;
2518
2519         inode = file_inode(iocb->ki_filp);
2520
2521         /*
2522          * BB - optimize the way when signing is disabled. We can drop this
2523          * extra memory-to-memory copying and use iovec buffers for constructing
2524          * write request.
2525          */
2526
2527         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2528         if (written > 0) {
2529                 CIFS_I(inode)->invalid_mapping = true;
2530                 iocb->ki_pos = pos;
2531         }
2532
2533         return written;
2534 }
2535
2536 static ssize_t
2537 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2538             unsigned long nr_segs, loff_t pos)
2539 {
2540         struct file *file = iocb->ki_filp;
2541         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2542         struct inode *inode = file->f_mapping->host;
2543         struct cifsInodeInfo *cinode = CIFS_I(inode);
2544         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2545         ssize_t rc = -EACCES;
2546
2547         BUG_ON(iocb->ki_pos != pos);
2548
2549         /*
2550          * We need to hold the sem to be sure nobody modifies lock list
2551          * with a brlock that prevents writing.
2552          */
2553         down_read(&cinode->lock_sem);
2554         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2555                                      server->vals->exclusive_lock_type, NULL,
2556                                      CIFS_WRITE_OP)) {
2557                 mutex_lock(&inode->i_mutex);
2558                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2559                                                &iocb->ki_pos);
2560                 mutex_unlock(&inode->i_mutex);
2561         }
2562
2563         if (rc > 0 || rc == -EIOCBQUEUED) {
2564                 ssize_t err;
2565
2566                 err = generic_write_sync(file, pos, rc);
2567                 if (err < 0 && rc > 0)
2568                         rc = err;
2569         }
2570
2571         up_read(&cinode->lock_sem);
2572         return rc;
2573 }
2574
2575 ssize_t
2576 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2577                    unsigned long nr_segs, loff_t pos)
2578 {
2579         struct inode *inode = file_inode(iocb->ki_filp);
2580         struct cifsInodeInfo *cinode = CIFS_I(inode);
2581         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2582         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2583                                                 iocb->ki_filp->private_data;
2584         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2585         ssize_t written;
2586
2587         if (cinode->clientCanCacheAll) {
2588                 if (cap_unix(tcon->ses) &&
2589                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2590                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2591                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2592                 return cifs_writev(iocb, iov, nr_segs, pos);
2593         }
2594         /*
2595          * For non-oplocked files in strict cache mode we need to write the data
2596          * to the server exactly from the pos to pos+len-1 rather than flush all
2597          * affected pages because it may cause a error with mandatory locks on
2598          * these pages but not on the region from pos to ppos+len-1.
2599          */
2600         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2601         if (written > 0 && cinode->clientCanCacheRead) {
2602                 /*
2603                  * Windows 7 server can delay breaking level2 oplock if a write
2604                  * request comes - break it on the client to prevent reading
2605                  * an old data.
2606                  */
2607                 cifs_invalidate_mapping(inode);
2608                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2609                          inode);
2610                 cinode->clientCanCacheRead = false;
2611         }
2612         return written;
2613 }
2614
2615 static struct cifs_readdata *
2616 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2617 {
2618         struct cifs_readdata *rdata;
2619
2620         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2621                         GFP_KERNEL);
2622         if (rdata != NULL) {
2623                 kref_init(&rdata->refcount);
2624                 INIT_LIST_HEAD(&rdata->list);
2625                 init_completion(&rdata->done);
2626                 INIT_WORK(&rdata->work, complete);
2627         }
2628
2629         return rdata;
2630 }
2631
2632 void
2633 cifs_readdata_release(struct kref *refcount)
2634 {
2635         struct cifs_readdata *rdata = container_of(refcount,
2636                                         struct cifs_readdata, refcount);
2637
2638         if (rdata->cfile)
2639                 cifsFileInfo_put(rdata->cfile);
2640
2641         kfree(rdata);
2642 }
2643
2644 static int
2645 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2646 {
2647         int rc = 0;
2648         struct page *page;
2649         unsigned int i;
2650
2651         for (i = 0; i < nr_pages; i++) {
2652                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2653                 if (!page) {
2654                         rc = -ENOMEM;
2655                         break;
2656                 }
2657                 rdata->pages[i] = page;
2658         }
2659
2660         if (rc) {
2661                 for (i = 0; i < nr_pages; i++) {
2662                         put_page(rdata->pages[i]);
2663                         rdata->pages[i] = NULL;
2664                 }
2665         }
2666         return rc;
2667 }
2668
2669 static void
2670 cifs_uncached_readdata_release(struct kref *refcount)
2671 {
2672         struct cifs_readdata *rdata = container_of(refcount,
2673                                         struct cifs_readdata, refcount);
2674         unsigned int i;
2675
2676         for (i = 0; i < rdata->nr_pages; i++) {
2677                 put_page(rdata->pages[i]);
2678                 rdata->pages[i] = NULL;
2679         }
2680         cifs_readdata_release(refcount);
2681 }
2682
2683 static int
2684 cifs_retry_async_readv(struct cifs_readdata *rdata)
2685 {
2686         int rc;
2687         struct TCP_Server_Info *server;
2688
2689         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2690
2691         do {
2692                 if (rdata->cfile->invalidHandle) {
2693                         rc = cifs_reopen_file(rdata->cfile, true);
2694                         if (rc != 0)
2695                                 continue;
2696                 }
2697                 rc = server->ops->async_readv(rdata);
2698         } while (rc == -EAGAIN);
2699
2700         return rc;
2701 }
2702
2703 /**
2704  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2705  * @rdata:      the readdata response with list of pages holding data
2706  * @iov:        vector in which we should copy the data
2707  * @nr_segs:    number of segments in vector
2708  * @offset:     offset into file of the first iovec
2709  * @copied:     used to return the amount of data copied to the iov
2710  *
2711  * This function copies data from a list of pages in a readdata response into
2712  * an array of iovecs. It will first calculate where the data should go
2713  * based on the info in the readdata and then copy the data into that spot.
2714  */
2715 static ssize_t
2716 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2717                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2718 {
2719         int rc = 0;
2720         struct iov_iter ii;
2721         size_t pos = rdata->offset - offset;
2722         ssize_t remaining = rdata->bytes;
2723         unsigned char *pdata;
2724         unsigned int i;
2725
2726         /* set up iov_iter and advance to the correct offset */
2727         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2728         iov_iter_advance(&ii, pos);
2729
2730         *copied = 0;
2731         for (i = 0; i < rdata->nr_pages; i++) {
2732                 ssize_t copy;
2733                 struct page *page = rdata->pages[i];
2734
2735                 /* copy a whole page or whatever's left */
2736                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2737
2738                 /* ...but limit it to whatever space is left in the iov */
2739                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2740
2741                 /* go while there's data to be copied and no errors */
2742                 if (copy && !rc) {
2743                         pdata = kmap(page);
2744                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2745                                                 (int)copy);
2746                         kunmap(page);
2747                         if (!rc) {
2748                                 *copied += copy;
2749                                 remaining -= copy;
2750                                 iov_iter_advance(&ii, copy);
2751                         }
2752                 }
2753         }
2754
2755         return rc;
2756 }
2757
2758 static void
2759 cifs_uncached_readv_complete(struct work_struct *work)
2760 {
2761         struct cifs_readdata *rdata = container_of(work,
2762                                                 struct cifs_readdata, work);
2763
2764         complete(&rdata->done);
2765         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2766 }
2767
2768 static int
2769 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2770                         struct cifs_readdata *rdata, unsigned int len)
2771 {
2772         int total_read = 0, result = 0;
2773         unsigned int i;
2774         unsigned int nr_pages = rdata->nr_pages;
2775         struct kvec iov;
2776
2777         rdata->tailsz = PAGE_SIZE;
2778         for (i = 0; i < nr_pages; i++) {
2779                 struct page *page = rdata->pages[i];
2780
2781                 if (len >= PAGE_SIZE) {
2782                         /* enough data to fill the page */
2783                         iov.iov_base = kmap(page);
2784                         iov.iov_len = PAGE_SIZE;
2785                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2786                                  i, iov.iov_base, iov.iov_len);
2787                         len -= PAGE_SIZE;
2788                 } else if (len > 0) {
2789                         /* enough for partial page, fill and zero the rest */
2790                         iov.iov_base = kmap(page);
2791                         iov.iov_len = len;
2792                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2793                                  i, iov.iov_base, iov.iov_len);
2794                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2795                         rdata->tailsz = len;
2796                         len = 0;
2797                 } else {
2798                         /* no need to hold page hostage */
2799                         rdata->pages[i] = NULL;
2800                         rdata->nr_pages--;
2801                         put_page(page);
2802                         continue;
2803                 }
2804
2805                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2806                 kunmap(page);
2807                 if (result < 0)
2808                         break;
2809
2810                 total_read += result;
2811         }
2812
2813         return total_read > 0 && result != -EAGAIN ? total_read : result;
2814 }
2815
2816 static ssize_t
2817 cifs_iovec_read(struct file *file, const struct iovec *iov,
2818                  unsigned long nr_segs, loff_t *poffset)
2819 {
2820         ssize_t rc;
2821         size_t len, cur_len;
2822         ssize_t total_read = 0;
2823         loff_t offset = *poffset;
2824         unsigned int npages;
2825         struct cifs_sb_info *cifs_sb;
2826         struct cifs_tcon *tcon;
2827         struct cifsFileInfo *open_file;
2828         struct cifs_readdata *rdata, *tmp;
2829         struct list_head rdata_list;
2830         pid_t pid;
2831
2832         if (!nr_segs)
2833                 return 0;
2834
2835         len = iov_length(iov, nr_segs);
2836         if (!len)
2837                 return 0;
2838
2839         INIT_LIST_HEAD(&rdata_list);
2840         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2841         open_file = file->private_data;
2842         tcon = tlink_tcon(open_file->tlink);
2843
2844         if (!tcon->ses->server->ops->async_readv)
2845                 return -ENOSYS;
2846
2847         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2848                 pid = open_file->pid;
2849         else
2850                 pid = current->tgid;
2851
2852         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2853                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2854
2855         do {
2856                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2857                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2858
2859                 /* allocate a readdata struct */
2860                 rdata = cifs_readdata_alloc(npages,
2861                                             cifs_uncached_readv_complete);
2862                 if (!rdata) {
2863                         rc = -ENOMEM;
2864                         goto error;
2865                 }
2866
2867                 rc = cifs_read_allocate_pages(rdata, npages);
2868                 if (rc)
2869                         goto error;
2870
2871                 rdata->cfile = cifsFileInfo_get(open_file);
2872                 rdata->nr_pages = npages;
2873                 rdata->offset = offset;
2874                 rdata->bytes = cur_len;
2875                 rdata->pid = pid;
2876                 rdata->pagesz = PAGE_SIZE;
2877                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2878
2879                 rc = cifs_retry_async_readv(rdata);
2880 error:
2881                 if (rc) {
2882                         kref_put(&rdata->refcount,
2883                                  cifs_uncached_readdata_release);
2884                         break;
2885                 }
2886
2887                 list_add_tail(&rdata->list, &rdata_list);
2888                 offset += cur_len;
2889                 len -= cur_len;
2890         } while (len > 0);
2891
2892         /* if at least one read request send succeeded, then reset rc */
2893         if (!list_empty(&rdata_list))
2894                 rc = 0;
2895
2896         /* the loop below should proceed in the order of increasing offsets */
2897 restart_loop:
2898         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2899                 if (!rc) {
2900                         ssize_t copied;
2901
2902                         /* FIXME: freezable sleep too? */
2903                         rc = wait_for_completion_killable(&rdata->done);
2904                         if (rc)
2905                                 rc = -EINTR;
2906                         else if (rdata->result)
2907                                 rc = rdata->result;
2908                         else {
2909                                 rc = cifs_readdata_to_iov(rdata, iov,
2910                                                         nr_segs, *poffset,
2911                                                         &copied);
2912                                 total_read += copied;
2913                         }
2914
2915                         /* resend call if it's a retryable error */
2916                         if (rc == -EAGAIN) {
2917                                 rc = cifs_retry_async_readv(rdata);
2918                                 goto restart_loop;
2919                         }
2920                 }
2921                 list_del_init(&rdata->list);
2922                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2923         }
2924
2925         cifs_stats_bytes_read(tcon, total_read);
2926         *poffset += total_read;
2927
2928         /* mask nodata case */
2929         if (rc == -ENODATA)
2930                 rc = 0;
2931
2932         return total_read ? total_read : rc;
2933 }
2934
2935 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2936                                unsigned long nr_segs, loff_t pos)
2937 {
2938         ssize_t read;
2939
2940         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2941         if (read > 0)
2942                 iocb->ki_pos = pos;
2943
2944         return read;
2945 }
2946
2947 ssize_t
2948 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2949                   unsigned long nr_segs, loff_t pos)
2950 {
2951         struct inode *inode = file_inode(iocb->ki_filp);
2952         struct cifsInodeInfo *cinode = CIFS_I(inode);
2953         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2954         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2955                                                 iocb->ki_filp->private_data;
2956         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2957         int rc = -EACCES;
2958
2959         /*
2960          * In strict cache mode we need to read from the server all the time
2961          * if we don't have level II oplock because the server can delay mtime
2962          * change - so we can't make a decision about inode invalidating.
2963          * And we can also fail with pagereading if there are mandatory locks
2964          * on pages affected by this read but not on the region from pos to
2965          * pos+len-1.
2966          */
2967         if (!cinode->clientCanCacheRead)
2968                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2969
2970         if (cap_unix(tcon->ses) &&
2971             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2972             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2973                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2974
2975         /*
2976          * We need to hold the sem to be sure nobody modifies lock list
2977          * with a brlock that prevents reading.
2978          */
2979         down_read(&cinode->lock_sem);
2980         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2981                                      tcon->ses->server->vals->shared_lock_type,
2982                                      NULL, CIFS_READ_OP))
2983                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2984         up_read(&cinode->lock_sem);
2985         return rc;
2986 }
2987
2988 static ssize_t
2989 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2990 {
2991         int rc = -EACCES;
2992         unsigned int bytes_read = 0;
2993         unsigned int total_read;
2994         unsigned int current_read_size;
2995         unsigned int rsize;
2996         struct cifs_sb_info *cifs_sb;
2997         struct cifs_tcon *tcon;
2998         struct TCP_Server_Info *server;
2999         unsigned int xid;
3000         char *cur_offset;
3001         struct cifsFileInfo *open_file;
3002         struct cifs_io_parms io_parms;
3003         int buf_type = CIFS_NO_BUFFER;
3004         __u32 pid;
3005
3006         xid = get_xid();
3007         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3008
3009         /* FIXME: set up handlers for larger reads and/or convert to async */
3010         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3011
3012         if (file->private_data == NULL) {
3013                 rc = -EBADF;
3014                 free_xid(xid);
3015                 return rc;
3016         }
3017         open_file = file->private_data;
3018         tcon = tlink_tcon(open_file->tlink);
3019         server = tcon->ses->server;
3020
3021         if (!server->ops->sync_read) {
3022                 free_xid(xid);
3023                 return -ENOSYS;
3024         }
3025
3026         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3027                 pid = open_file->pid;
3028         else
3029                 pid = current->tgid;
3030
3031         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3032                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3033
3034         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3035              total_read += bytes_read, cur_offset += bytes_read) {
3036                 current_read_size = min_t(uint, read_size - total_read, rsize);
3037                 /*
3038                  * For windows me and 9x we do not want to request more than it
3039                  * negotiated since it will refuse the read then.
3040                  */
3041                 if ((tcon->ses) && !(tcon->ses->capabilities &
3042                                 tcon->ses->server->vals->cap_large_files)) {
3043                         current_read_size = min_t(uint, current_read_size,
3044                                         CIFSMaxBufSize);
3045                 }
3046                 rc = -EAGAIN;
3047                 while (rc == -EAGAIN) {
3048                         if (open_file->invalidHandle) {
3049                                 rc = cifs_reopen_file(open_file, true);
3050                                 if (rc != 0)
3051                                         break;
3052                         }
3053                         io_parms.pid = pid;
3054                         io_parms.tcon = tcon;
3055                         io_parms.offset = *offset;
3056                         io_parms.length = current_read_size;
3057                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3058                                                     &bytes_read, &cur_offset,
3059                                                     &buf_type);
3060                 }
3061                 if (rc || (bytes_read == 0)) {
3062                         if (total_read) {
3063                                 break;
3064                         } else {
3065                                 free_xid(xid);
3066                                 return rc;
3067                         }
3068                 } else {
3069                         cifs_stats_bytes_read(tcon, total_read);
3070                         *offset += bytes_read;
3071                 }
3072         }
3073         free_xid(xid);
3074         return total_read;
3075 }
3076
3077 /*
3078  * If the page is mmap'ed into a process' page tables, then we need to make
3079  * sure that it doesn't change while being written back.
3080  */
3081 static int
3082 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3083 {
3084         struct page *page = vmf->page;
3085
3086         lock_page(page);
3087         return VM_FAULT_LOCKED;
3088 }
3089
3090 static struct vm_operations_struct cifs_file_vm_ops = {
3091         .fault = filemap_fault,
3092         .page_mkwrite = cifs_page_mkwrite,
3093         .remap_pages = generic_file_remap_pages,
3094 };
3095
3096 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3097 {
3098         int rc, xid;
3099         struct inode *inode = file_inode(file);
3100
3101         xid = get_xid();
3102
3103         if (!CIFS_I(inode)->clientCanCacheRead) {
3104                 rc = cifs_invalidate_mapping(inode);
3105                 if (rc)
3106                         return rc;
3107         }
3108
3109         rc = generic_file_mmap(file, vma);
3110         if (rc == 0)
3111                 vma->vm_ops = &cifs_file_vm_ops;
3112         free_xid(xid);
3113         return rc;
3114 }
3115
3116 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3117 {
3118         int rc, xid;
3119
3120         xid = get_xid();
3121         rc = cifs_revalidate_file(file);
3122         if (rc) {
3123                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3124                          rc);
3125                 free_xid(xid);
3126                 return rc;
3127         }
3128         rc = generic_file_mmap(file, vma);
3129         if (rc == 0)
3130                 vma->vm_ops = &cifs_file_vm_ops;
3131         free_xid(xid);
3132         return rc;
3133 }
3134
3135 static void
3136 cifs_readv_complete(struct work_struct *work)
3137 {
3138         unsigned int i;
3139         struct cifs_readdata *rdata = container_of(work,
3140                                                 struct cifs_readdata, work);
3141
3142         for (i = 0; i < rdata->nr_pages; i++) {
3143                 struct page *page = rdata->pages[i];
3144
3145                 lru_cache_add_file(page);
3146
3147                 if (rdata->result == 0) {
3148                         flush_dcache_page(page);
3149                         SetPageUptodate(page);
3150                 }
3151
3152                 unlock_page(page);
3153
3154                 if (rdata->result == 0)
3155                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3156
3157                 page_cache_release(page);
3158                 rdata->pages[i] = NULL;
3159         }
3160         kref_put(&rdata->refcount, cifs_readdata_release);
3161 }
3162
3163 static int
3164 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3165                         struct cifs_readdata *rdata, unsigned int len)
3166 {
3167         int total_read = 0, result = 0;
3168         unsigned int i;
3169         u64 eof;
3170         pgoff_t eof_index;
3171         unsigned int nr_pages = rdata->nr_pages;
3172         struct kvec iov;
3173
3174         /* determine the eof that the server (probably) has */
3175         eof = CIFS_I(rdata->mapping->host)->server_eof;
3176         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3177         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3178
3179         rdata->tailsz = PAGE_CACHE_SIZE;
3180         for (i = 0; i < nr_pages; i++) {
3181                 struct page *page = rdata->pages[i];
3182
3183                 if (len >= PAGE_CACHE_SIZE) {
3184                         /* enough data to fill the page */
3185                         iov.iov_base = kmap(page);
3186                         iov.iov_len = PAGE_CACHE_SIZE;
3187                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3188                                  i, page->index, iov.iov_base, iov.iov_len);
3189                         len -= PAGE_CACHE_SIZE;
3190                 } else if (len > 0) {
3191                         /* enough for partial page, fill and zero the rest */
3192                         iov.iov_base = kmap(page);
3193                         iov.iov_len = len;
3194                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3195                                  i, page->index, iov.iov_base, iov.iov_len);
3196                         memset(iov.iov_base + len,
3197                                 '\0', PAGE_CACHE_SIZE - len);
3198                         rdata->tailsz = len;
3199                         len = 0;
3200                 } else if (page->index > eof_index) {
3201                         /*
3202                          * The VFS will not try to do readahead past the
3203                          * i_size, but it's possible that we have outstanding
3204                          * writes with gaps in the middle and the i_size hasn't
3205                          * caught up yet. Populate those with zeroed out pages
3206                          * to prevent the VFS from repeatedly attempting to
3207                          * fill them until the writes are flushed.
3208                          */
3209                         zero_user(page, 0, PAGE_CACHE_SIZE);
3210                         lru_cache_add_file(page);
3211                         flush_dcache_page(page);
3212                         SetPageUptodate(page);
3213                         unlock_page(page);
3214                         page_cache_release(page);
3215                         rdata->pages[i] = NULL;
3216                         rdata->nr_pages--;
3217                         continue;
3218                 } else {
3219                         /* no need to hold page hostage */
3220                         lru_cache_add_file(page);
3221                         unlock_page(page);
3222                         page_cache_release(page);
3223                         rdata->pages[i] = NULL;
3224                         rdata->nr_pages--;
3225                         continue;
3226                 }
3227
3228                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3229                 kunmap(page);
3230                 if (result < 0)
3231                         break;
3232
3233                 total_read += result;
3234         }
3235
3236         return total_read > 0 && result != -EAGAIN ? total_read : result;
3237 }
3238
3239 static int cifs_readpages(struct file *file, struct address_space *mapping,
3240         struct list_head *page_list, unsigned num_pages)
3241 {
3242         int rc;
3243         struct list_head tmplist;
3244         struct cifsFileInfo *open_file = file->private_data;
3245         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3246         unsigned int rsize = cifs_sb->rsize;
3247         pid_t pid;
3248
3249         /*
3250          * Give up immediately if rsize is too small to read an entire page.
3251          * The VFS will fall back to readpage. We should never reach this
3252          * point however since we set ra_pages to 0 when the rsize is smaller
3253          * than a cache page.
3254          */
3255         if (unlikely(rsize < PAGE_CACHE_SIZE))
3256                 return 0;
3257
3258         /*
3259          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3260          * immediately if the cookie is negative
3261          */
3262         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3263                                          &num_pages);
3264         if (rc == 0)
3265                 return rc;
3266
3267         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3268                 pid = open_file->pid;
3269         else
3270                 pid = current->tgid;
3271
3272         rc = 0;
3273         INIT_LIST_HEAD(&tmplist);
3274
3275         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3276                  __func__, file, mapping, num_pages);
3277
3278         /*
3279          * Start with the page at end of list and move it to private
3280          * list. Do the same with any following pages until we hit
3281          * the rsize limit, hit an index discontinuity, or run out of
3282          * pages. Issue the async read and then start the loop again
3283          * until the list is empty.
3284          *
3285          * Note that list order is important. The page_list is in
3286          * the order of declining indexes. When we put the pages in
3287          * the rdata->pages, then we want them in increasing order.
3288          */
3289         while (!list_empty(page_list)) {
3290                 unsigned int i;
3291                 unsigned int bytes = PAGE_CACHE_SIZE;
3292                 unsigned int expected_index;
3293                 unsigned int nr_pages = 1;
3294                 loff_t offset;
3295                 struct page *page, *tpage;
3296                 struct cifs_readdata *rdata;
3297
3298                 page = list_entry(page_list->prev, struct page, lru);
3299
3300                 /*
3301                  * Lock the page and put it in the cache. Since no one else
3302                  * should have access to this page, we're safe to simply set
3303                  * PG_locked without checking it first.
3304                  */
3305                 __set_page_locked(page);
3306                 rc = add_to_page_cache_locked(page, mapping,
3307                                               page->index, GFP_KERNEL);
3308
3309                 /* give up if we can't stick it in the cache */
3310                 if (rc) {
3311                         __clear_page_locked(page);
3312                         break;
3313                 }
3314
3315                 /* move first page to the tmplist */
3316                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3317                 list_move_tail(&page->lru, &tmplist);
3318
3319                 /* now try and add more pages onto the request */
3320                 expected_index = page->index + 1;
3321                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3322                         /* discontinuity ? */
3323                         if (page->index != expected_index)
3324                                 break;
3325
3326                         /* would this page push the read over the rsize? */
3327                         if (bytes + PAGE_CACHE_SIZE > rsize)
3328                                 break;
3329
3330                         __set_page_locked(page);
3331                         if (add_to_page_cache_locked(page, mapping,
3332                                                 page->index, GFP_KERNEL)) {
3333                                 __clear_page_locked(page);
3334                                 break;
3335                         }
3336                         list_move_tail(&page->lru, &tmplist);
3337                         bytes += PAGE_CACHE_SIZE;
3338                         expected_index++;
3339                         nr_pages++;
3340                 }
3341
3342                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3343                 if (!rdata) {
3344                         /* best to give up if we're out of mem */
3345                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3346                                 list_del(&page->lru);
3347                                 lru_cache_add_file(page);
3348                                 unlock_page(page);
3349                                 page_cache_release(page);
3350                         }
3351                         rc = -ENOMEM;
3352                         break;
3353                 }
3354
3355                 rdata->cfile = cifsFileInfo_get(open_file);
3356                 rdata->mapping = mapping;
3357                 rdata->offset = offset;
3358                 rdata->bytes = bytes;
3359                 rdata->pid = pid;
3360                 rdata->pagesz = PAGE_CACHE_SIZE;
3361                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3362
3363                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3364                         list_del(&page->lru);
3365                         rdata->pages[rdata->nr_pages++] = page;
3366                 }
3367
3368                 rc = cifs_retry_async_readv(rdata);
3369                 if (rc != 0) {
3370                         for (i = 0; i < rdata->nr_pages; i++) {
3371                                 page = rdata->pages[i];
3372                                 lru_cache_add_file(page);
3373                                 unlock_page(page);
3374                                 page_cache_release(page);
3375                         }
3376                         kref_put(&rdata->refcount, cifs_readdata_release);
3377                         break;
3378                 }
3379
3380                 kref_put(&rdata->refcount, cifs_readdata_release);
3381         }
3382
3383         return rc;
3384 }
3385
3386 static int cifs_readpage_worker(struct file *file, struct page *page,
3387         loff_t *poffset)
3388 {
3389         char *read_data;
3390         int rc;
3391
3392         /* Is the page cached? */
3393         rc = cifs_readpage_from_fscache(file_inode(file), page);
3394         if (rc == 0)
3395                 goto read_complete;
3396
3397         page_cache_get(page);
3398         read_data = kmap(page);
3399         /* for reads over a certain size could initiate async read ahead */
3400
3401         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3402
3403         if (rc < 0)
3404                 goto io_error;
3405         else
3406                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3407
3408         file_inode(file)->i_atime =
3409                 current_fs_time(file_inode(file)->i_sb);
3410
3411         if (PAGE_CACHE_SIZE > rc)
3412                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3413
3414         flush_dcache_page(page);
3415         SetPageUptodate(page);
3416
3417         /* send this page to the cache */
3418         cifs_readpage_to_fscache(file_inode(file), page);
3419
3420         rc = 0;
3421
3422 io_error:
3423         kunmap(page);
3424         page_cache_release(page);
3425
3426 read_complete:
3427         return rc;
3428 }
3429
3430 static int cifs_readpage(struct file *file, struct page *page)
3431 {
3432         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3433         int rc = -EACCES;
3434         unsigned int xid;
3435
3436         xid = get_xid();
3437
3438         if (file->private_data == NULL) {
3439                 rc = -EBADF;
3440                 free_xid(xid);
3441                 return rc;
3442         }
3443
3444         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3445                  page, (int)offset, (int)offset);
3446
3447         rc = cifs_readpage_worker(file, page, &offset);
3448
3449         unlock_page(page);
3450
3451         free_xid(xid);
3452         return rc;
3453 }
3454
3455 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3456 {
3457         struct cifsFileInfo *open_file;
3458
3459         spin_lock(&cifs_file_list_lock);
3460         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3461                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3462                         spin_unlock(&cifs_file_list_lock);
3463                         return 1;
3464                 }
3465         }
3466         spin_unlock(&cifs_file_list_lock);
3467         return 0;
3468 }
3469
3470 /* We do not want to update the file size from server for inodes
3471    open for write - to avoid races with writepage extending
3472    the file - in the future we could consider allowing
3473    refreshing the inode only on increases in the file size
3474    but this is tricky to do without racing with writebehind
3475    page caching in the current Linux kernel design */
3476 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3477 {
3478         if (!cifsInode)
3479                 return true;
3480
3481         if (is_inode_writable(cifsInode)) {
3482                 /* This inode is open for write at least once */
3483                 struct cifs_sb_info *cifs_sb;
3484
3485                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3486                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3487                         /* since no page cache to corrupt on directio
3488                         we can change size safely */
3489                         return true;
3490                 }
3491
3492                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3493                         return true;
3494
3495                 return false;
3496         } else
3497                 return true;
3498 }
3499
3500 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3501                         loff_t pos, unsigned len, unsigned flags,
3502                         struct page **pagep, void **fsdata)
3503 {
3504         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3505         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3506         loff_t page_start = pos & PAGE_MASK;
3507         loff_t i_size;
3508         struct page *page;
3509         int rc = 0;
3510
3511         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3512
3513         page = grab_cache_page_write_begin(mapping, index, flags);
3514         if (!page) {
3515                 rc = -ENOMEM;
3516                 goto out;
3517         }
3518
3519         if (PageUptodate(page))
3520                 goto out;
3521
3522         /*
3523          * If we write a full page it will be up to date, no need to read from
3524          * the server. If the write is short, we'll end up doing a sync write
3525          * instead.
3526          */
3527         if (len == PAGE_CACHE_SIZE)
3528                 goto out;
3529
3530         /*
3531          * optimize away the read when we have an oplock, and we're not
3532          * expecting to use any of the data we'd be reading in. That
3533          * is, when the page lies beyond the EOF, or straddles the EOF
3534          * and the write will cover all of the existing data.
3535          */
3536         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3537                 i_size = i_size_read(mapping->host);
3538                 if (page_start >= i_size ||
3539                     (offset == 0 && (pos + len) >= i_size)) {
3540                         zero_user_segments(page, 0, offset,
3541                                            offset + len,
3542                                            PAGE_CACHE_SIZE);
3543                         /*
3544                          * PageChecked means that the parts of the page
3545                          * to which we're not writing are considered up
3546                          * to date. Once the data is copied to the
3547                          * page, it can be set uptodate.
3548                          */
3549                         SetPageChecked(page);
3550                         goto out;
3551                 }
3552         }
3553
3554         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3555                 /*
3556                  * might as well read a page, it is fast enough. If we get
3557                  * an error, we don't need to return it. cifs_write_end will
3558                  * do a sync write instead since PG_uptodate isn't set.
3559                  */
3560                 cifs_readpage_worker(file, page, &page_start);
3561         } else {
3562                 /* we could try using another file handle if there is one -
3563                    but how would we lock it to prevent close of that handle
3564                    racing with this read? In any case
3565                    this will be written out by write_end so is fine */
3566         }
3567 out:
3568         *pagep = page;
3569         return rc;
3570 }
3571
3572 static int cifs_release_page(struct page *page, gfp_t gfp)
3573 {
3574         if (PagePrivate(page))
3575                 return 0;
3576
3577         return cifs_fscache_release_page(page, gfp);
3578 }
3579
3580 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3581 {
3582         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3583
3584         if (offset == 0)
3585                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3586 }
3587
3588 static int cifs_launder_page(struct page *page)
3589 {
3590         int rc = 0;
3591         loff_t range_start = page_offset(page);
3592         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3593         struct writeback_control wbc = {
3594                 .sync_mode = WB_SYNC_ALL,
3595                 .nr_to_write = 0,
3596                 .range_start = range_start,
3597                 .range_end = range_end,
3598         };
3599
3600         cifs_dbg(FYI, "Launder page: %p\n", page);
3601
3602         if (clear_page_dirty_for_io(page))
3603                 rc = cifs_writepage_locked(page, &wbc);
3604
3605         cifs_fscache_invalidate_page(page, page->mapping->host);
3606         return rc;
3607 }
3608
3609 void cifs_oplock_break(struct work_struct *work)
3610 {
3611         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3612                                                   oplock_break);
3613         struct inode *inode = cfile->dentry->d_inode;
3614         struct cifsInodeInfo *cinode = CIFS_I(inode);
3615         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3616         int rc = 0;
3617
3618         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3619                                                 cifs_has_mand_locks(cinode)) {
3620                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3621                          inode);
3622                 cinode->clientCanCacheRead = false;
3623         }
3624
3625         if (inode && S_ISREG(inode->i_mode)) {
3626                 if (cinode->clientCanCacheRead)
3627                         break_lease(inode, O_RDONLY);
3628                 else
3629                         break_lease(inode, O_WRONLY);
3630                 rc = filemap_fdatawrite(inode->i_mapping);
3631                 if (cinode->clientCanCacheRead == 0) {
3632                         rc = filemap_fdatawait(inode->i_mapping);
3633                         mapping_set_error(inode->i_mapping, rc);
3634                         cifs_invalidate_mapping(inode);
3635                 }
3636                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3637         }
3638
3639         rc = cifs_push_locks(cfile);
3640         if (rc)
3641                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3642
3643         /*
3644          * releasing stale oplock after recent reconnect of smb session using
3645          * a now incorrect file handle is not a data integrity issue but do
3646          * not bother sending an oplock release if session to server still is
3647          * disconnected since oplock already released by the server
3648          */
3649         if (!cfile->oplock_break_cancelled) {
3650                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3651                                                              cinode);
3652                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3653         }
3654 }
3655
3656 const struct address_space_operations cifs_addr_ops = {
3657         .readpage = cifs_readpage,
3658         .readpages = cifs_readpages,
3659         .writepage = cifs_writepage,
3660         .writepages = cifs_writepages,
3661         .write_begin = cifs_write_begin,
3662         .write_end = cifs_write_end,
3663         .set_page_dirty = __set_page_dirty_nobuffers,
3664         .releasepage = cifs_release_page,
3665         .invalidatepage = cifs_invalidate_page,
3666         .launder_page = cifs_launder_page,
3667 };
3668
3669 /*
3670  * cifs_readpages requires the server to support a buffer large enough to
3671  * contain the header plus one complete page of data.  Otherwise, we need
3672  * to leave cifs_readpages out of the address space operations.
3673  */
3674 const struct address_space_operations cifs_addr_ops_smallbuf = {
3675         .readpage = cifs_readpage,
3676         .writepage = cifs_writepage,
3677         .writepages = cifs_writepages,
3678         .write_begin = cifs_write_begin,
3679         .write_end = cifs_write_end,
3680         .set_page_dirty = __set_page_dirty_nobuffers,
3681         .releasepage = cifs_release_page,
3682         .invalidatepage = cifs_invalidate_page,
3683         .launder_page = cifs_launder_page,
3684 };