Merge commit 'ed30f24e8d07d30aa3e69d1f508f4d7bd2e8ea14' of git://git.linaro.org/landi...
[firefly-linux-kernel-4.4.55.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         rc = server->ops->open(xid, tcon, full_path, disposition,
228                                desired_access, create_options, fid, oplock, buf,
229                                cifs_sb);
230
231         if (rc)
232                 goto out;
233
234         if (tcon->unix_ext)
235                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236                                               xid);
237         else
238                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239                                          xid, &fid->netfid);
240
241 out:
242         kfree(buf);
243         return rc;
244 }
245
246 static bool
247 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
248 {
249         struct cifs_fid_locks *cur;
250         bool has_locks = false;
251
252         down_read(&cinode->lock_sem);
253         list_for_each_entry(cur, &cinode->llist, llist) {
254                 if (!list_empty(&cur->locks)) {
255                         has_locks = true;
256                         break;
257                 }
258         }
259         up_read(&cinode->lock_sem);
260         return has_locks;
261 }
262
263 struct cifsFileInfo *
264 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
265                   struct tcon_link *tlink, __u32 oplock)
266 {
267         struct dentry *dentry = file->f_path.dentry;
268         struct inode *inode = dentry->d_inode;
269         struct cifsInodeInfo *cinode = CIFS_I(inode);
270         struct cifsFileInfo *cfile;
271         struct cifs_fid_locks *fdlocks;
272         struct cifs_tcon *tcon = tlink_tcon(tlink);
273         struct TCP_Server_Info *server = tcon->ses->server;
274
275         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
276         if (cfile == NULL)
277                 return cfile;
278
279         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
280         if (!fdlocks) {
281                 kfree(cfile);
282                 return NULL;
283         }
284
285         INIT_LIST_HEAD(&fdlocks->locks);
286         fdlocks->cfile = cfile;
287         cfile->llist = fdlocks;
288         down_write(&cinode->lock_sem);
289         list_add(&fdlocks->llist, &cinode->llist);
290         up_write(&cinode->lock_sem);
291
292         cfile->count = 1;
293         cfile->pid = current->tgid;
294         cfile->uid = current_fsuid();
295         cfile->dentry = dget(dentry);
296         cfile->f_flags = file->f_flags;
297         cfile->invalidHandle = false;
298         cfile->tlink = cifs_get_tlink(tlink);
299         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
300         mutex_init(&cfile->fh_mutex);
301
302         cifs_sb_active(inode->i_sb);
303
304         /*
305          * If the server returned a read oplock and we have mandatory brlocks,
306          * set oplock level to None.
307          */
308         if (oplock == server->vals->oplock_read &&
309                                                 cifs_has_mand_locks(cinode)) {
310                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
311                 oplock = 0;
312         }
313
314         spin_lock(&cifs_file_list_lock);
315         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
316                 oplock = fid->pending_open->oplock;
317         list_del(&fid->pending_open->olist);
318
319         server->ops->set_fid(cfile, fid, oplock);
320
321         list_add(&cfile->tlist, &tcon->openFileList);
322         /* if readable file instance put first in list*/
323         if (file->f_mode & FMODE_READ)
324                 list_add(&cfile->flist, &cinode->openFileList);
325         else
326                 list_add_tail(&cfile->flist, &cinode->openFileList);
327         spin_unlock(&cifs_file_list_lock);
328
329         file->private_data = cfile;
330         return cfile;
331 }
332
333 struct cifsFileInfo *
334 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
335 {
336         spin_lock(&cifs_file_list_lock);
337         cifsFileInfo_get_locked(cifs_file);
338         spin_unlock(&cifs_file_list_lock);
339         return cifs_file;
340 }
341
342 /*
343  * Release a reference on the file private data. This may involve closing
344  * the filehandle out on the server. Must be called without holding
345  * cifs_file_list_lock.
346  */
347 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
348 {
349         struct inode *inode = cifs_file->dentry->d_inode;
350         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
351         struct TCP_Server_Info *server = tcon->ses->server;
352         struct cifsInodeInfo *cifsi = CIFS_I(inode);
353         struct super_block *sb = inode->i_sb;
354         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
355         struct cifsLockInfo *li, *tmp;
356         struct cifs_fid fid;
357         struct cifs_pending_open open;
358
359         spin_lock(&cifs_file_list_lock);
360         if (--cifs_file->count > 0) {
361                 spin_unlock(&cifs_file_list_lock);
362                 return;
363         }
364
365         if (server->ops->get_lease_key)
366                 server->ops->get_lease_key(inode, &fid);
367
368         /* store open in pending opens to make sure we don't miss lease break */
369         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
370
371         /* remove it from the lists */
372         list_del(&cifs_file->flist);
373         list_del(&cifs_file->tlist);
374
375         if (list_empty(&cifsi->openFileList)) {
376                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
377                          cifs_file->dentry->d_inode);
378                 /*
379                  * In strict cache mode we need invalidate mapping on the last
380                  * close  because it may cause a error when we open this file
381                  * again and get at least level II oplock.
382                  */
383                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
384                         CIFS_I(inode)->invalid_mapping = true;
385                 cifs_set_oplock_level(cifsi, 0);
386         }
387         spin_unlock(&cifs_file_list_lock);
388
389         cancel_work_sync(&cifs_file->oplock_break);
390
391         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
392                 struct TCP_Server_Info *server = tcon->ses->server;
393                 unsigned int xid;
394
395                 xid = get_xid();
396                 if (server->ops->close)
397                         server->ops->close(xid, tcon, &cifs_file->fid);
398                 _free_xid(xid);
399         }
400
401         cifs_del_pending_open(&open);
402
403         /*
404          * Delete any outstanding lock records. We'll lose them when the file
405          * is closed anyway.
406          */
407         down_write(&cifsi->lock_sem);
408         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
409                 list_del(&li->llist);
410                 cifs_del_lock_waiters(li);
411                 kfree(li);
412         }
413         list_del(&cifs_file->llist->llist);
414         kfree(cifs_file->llist);
415         up_write(&cifsi->lock_sem);
416
417         cifs_put_tlink(cifs_file->tlink);
418         dput(cifs_file->dentry);
419         cifs_sb_deactive(sb);
420         kfree(cifs_file);
421 }
422
423 int cifs_open(struct inode *inode, struct file *file)
424
425 {
426         int rc = -EACCES;
427         unsigned int xid;
428         __u32 oplock;
429         struct cifs_sb_info *cifs_sb;
430         struct TCP_Server_Info *server;
431         struct cifs_tcon *tcon;
432         struct tcon_link *tlink;
433         struct cifsFileInfo *cfile = NULL;
434         char *full_path = NULL;
435         bool posix_open_ok = false;
436         struct cifs_fid fid;
437         struct cifs_pending_open open;
438
439         xid = get_xid();
440
441         cifs_sb = CIFS_SB(inode->i_sb);
442         tlink = cifs_sb_tlink(cifs_sb);
443         if (IS_ERR(tlink)) {
444                 free_xid(xid);
445                 return PTR_ERR(tlink);
446         }
447         tcon = tlink_tcon(tlink);
448         server = tcon->ses->server;
449
450         full_path = build_path_from_dentry(file->f_path.dentry);
451         if (full_path == NULL) {
452                 rc = -ENOMEM;
453                 goto out;
454         }
455
456         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
457                  inode, file->f_flags, full_path);
458
459         if (server->oplocks)
460                 oplock = REQ_OPLOCK;
461         else
462                 oplock = 0;
463
464         if (!tcon->broken_posix_open && tcon->unix_ext &&
465             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
466                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
467                 /* can not refresh inode info since size could be stale */
468                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
469                                 cifs_sb->mnt_file_mode /* ignored */,
470                                 file->f_flags, &oplock, &fid.netfid, xid);
471                 if (rc == 0) {
472                         cifs_dbg(FYI, "posix open succeeded\n");
473                         posix_open_ok = true;
474                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
475                         if (tcon->ses->serverNOS)
476                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
477                                          tcon->ses->serverName,
478                                          tcon->ses->serverNOS);
479                         tcon->broken_posix_open = true;
480                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
481                          (rc != -EOPNOTSUPP)) /* path not found or net err */
482                         goto out;
483                 /*
484                  * Else fallthrough to retry open the old way on network i/o
485                  * or DFS errors.
486                  */
487         }
488
489         if (server->ops->get_lease_key)
490                 server->ops->get_lease_key(inode, &fid);
491
492         cifs_add_pending_open(&fid, tlink, &open);
493
494         if (!posix_open_ok) {
495                 if (server->ops->get_lease_key)
496                         server->ops->get_lease_key(inode, &fid);
497
498                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
499                                   file->f_flags, &oplock, &fid, xid);
500                 if (rc) {
501                         cifs_del_pending_open(&open);
502                         goto out;
503                 }
504         }
505
506         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
507         if (cfile == NULL) {
508                 if (server->ops->close)
509                         server->ops->close(xid, tcon, &fid);
510                 cifs_del_pending_open(&open);
511                 rc = -ENOMEM;
512                 goto out;
513         }
514
515         cifs_fscache_set_inode_cookie(inode, file);
516
517         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
518                 /*
519                  * Time to set mode which we can not set earlier due to
520                  * problems creating new read-only files.
521                  */
522                 struct cifs_unix_set_info_args args = {
523                         .mode   = inode->i_mode,
524                         .uid    = INVALID_UID, /* no change */
525                         .gid    = INVALID_GID, /* no change */
526                         .ctime  = NO_CHANGE_64,
527                         .atime  = NO_CHANGE_64,
528                         .mtime  = NO_CHANGE_64,
529                         .device = 0,
530                 };
531                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
532                                        cfile->pid);
533         }
534
535 out:
536         kfree(full_path);
537         free_xid(xid);
538         cifs_put_tlink(tlink);
539         return rc;
540 }
541
542 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
543
544 /*
545  * Try to reacquire byte range locks that were released when session
546  * to server was lost.
547  */
548 static int
549 cifs_relock_file(struct cifsFileInfo *cfile)
550 {
551         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
552         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
553         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
554         int rc = 0;
555
556         /* we are going to update can_cache_brlcks here - need a write access */
557         down_write(&cinode->lock_sem);
558         if (cinode->can_cache_brlcks) {
559                 /* can cache locks - no need to push them */
560                 up_write(&cinode->lock_sem);
561                 return rc;
562         }
563
564         if (cap_unix(tcon->ses) &&
565             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
566             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
567                 rc = cifs_push_posix_locks(cfile);
568         else
569                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
570
571         up_write(&cinode->lock_sem);
572         return rc;
573 }
574
575 static int
576 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
577 {
578         int rc = -EACCES;
579         unsigned int xid;
580         __u32 oplock;
581         struct cifs_sb_info *cifs_sb;
582         struct cifs_tcon *tcon;
583         struct TCP_Server_Info *server;
584         struct cifsInodeInfo *cinode;
585         struct inode *inode;
586         char *full_path = NULL;
587         int desired_access;
588         int disposition = FILE_OPEN;
589         int create_options = CREATE_NOT_DIR;
590         struct cifs_fid fid;
591
592         xid = get_xid();
593         mutex_lock(&cfile->fh_mutex);
594         if (!cfile->invalidHandle) {
595                 mutex_unlock(&cfile->fh_mutex);
596                 rc = 0;
597                 free_xid(xid);
598                 return rc;
599         }
600
601         inode = cfile->dentry->d_inode;
602         cifs_sb = CIFS_SB(inode->i_sb);
603         tcon = tlink_tcon(cfile->tlink);
604         server = tcon->ses->server;
605
606         /*
607          * Can not grab rename sem here because various ops, including those
608          * that already have the rename sem can end up causing writepage to get
609          * called and if the server was down that means we end up here, and we
610          * can never tell if the caller already has the rename_sem.
611          */
612         full_path = build_path_from_dentry(cfile->dentry);
613         if (full_path == NULL) {
614                 rc = -ENOMEM;
615                 mutex_unlock(&cfile->fh_mutex);
616                 free_xid(xid);
617                 return rc;
618         }
619
620         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
621                  inode, cfile->f_flags, full_path);
622
623         if (tcon->ses->server->oplocks)
624                 oplock = REQ_OPLOCK;
625         else
626                 oplock = 0;
627
628         if (tcon->unix_ext && cap_unix(tcon->ses) &&
629             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
630                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
631                 /*
632                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
633                  * original open. Must mask them off for a reopen.
634                  */
635                 unsigned int oflags = cfile->f_flags &
636                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
637
638                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
639                                      cifs_sb->mnt_file_mode /* ignored */,
640                                      oflags, &oplock, &fid.netfid, xid);
641                 if (rc == 0) {
642                         cifs_dbg(FYI, "posix reopen succeeded\n");
643                         goto reopen_success;
644                 }
645                 /*
646                  * fallthrough to retry open the old way on errors, especially
647                  * in the reconnect path it is important to retry hard
648                  */
649         }
650
651         desired_access = cifs_convert_flags(cfile->f_flags);
652
653         if (backup_cred(cifs_sb))
654                 create_options |= CREATE_OPEN_BACKUP_INTENT;
655
656         if (server->ops->get_lease_key)
657                 server->ops->get_lease_key(inode, &fid);
658
659         /*
660          * Can not refresh inode by passing in file_info buf to be returned by
661          * CIFSSMBOpen and then calling get_inode_info with returned buf since
662          * file might have write behind data that needs to be flushed and server
663          * version of file size can be stale. If we knew for sure that inode was
664          * not dirty locally we could do this.
665          */
666         rc = server->ops->open(xid, tcon, full_path, disposition,
667                                desired_access, create_options, &fid, &oplock,
668                                NULL, cifs_sb);
669         if (rc) {
670                 mutex_unlock(&cfile->fh_mutex);
671                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
672                 cifs_dbg(FYI, "oplock: %d\n", oplock);
673                 goto reopen_error_exit;
674         }
675
676 reopen_success:
677         cfile->invalidHandle = false;
678         mutex_unlock(&cfile->fh_mutex);
679         cinode = CIFS_I(inode);
680
681         if (can_flush) {
682                 rc = filemap_write_and_wait(inode->i_mapping);
683                 mapping_set_error(inode->i_mapping, rc);
684
685                 if (tcon->unix_ext)
686                         rc = cifs_get_inode_info_unix(&inode, full_path,
687                                                       inode->i_sb, xid);
688                 else
689                         rc = cifs_get_inode_info(&inode, full_path, NULL,
690                                                  inode->i_sb, xid, NULL);
691         }
692         /*
693          * Else we are writing out data to server already and could deadlock if
694          * we tried to flush data, and since we do not know if we have data that
695          * would invalidate the current end of file on the server we can not go
696          * to the server to get the new inode info.
697          */
698
699         server->ops->set_fid(cfile, &fid, oplock);
700         cifs_relock_file(cfile);
701
702 reopen_error_exit:
703         kfree(full_path);
704         free_xid(xid);
705         return rc;
706 }
707
708 int cifs_close(struct inode *inode, struct file *file)
709 {
710         if (file->private_data != NULL) {
711                 cifsFileInfo_put(file->private_data);
712                 file->private_data = NULL;
713         }
714
715         /* return code from the ->release op is always ignored */
716         return 0;
717 }
718
719 int cifs_closedir(struct inode *inode, struct file *file)
720 {
721         int rc = 0;
722         unsigned int xid;
723         struct cifsFileInfo *cfile = file->private_data;
724         struct cifs_tcon *tcon;
725         struct TCP_Server_Info *server;
726         char *buf;
727
728         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
729
730         if (cfile == NULL)
731                 return rc;
732
733         xid = get_xid();
734         tcon = tlink_tcon(cfile->tlink);
735         server = tcon->ses->server;
736
737         cifs_dbg(FYI, "Freeing private data in close dir\n");
738         spin_lock(&cifs_file_list_lock);
739         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
740                 cfile->invalidHandle = true;
741                 spin_unlock(&cifs_file_list_lock);
742                 if (server->ops->close_dir)
743                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
744                 else
745                         rc = -ENOSYS;
746                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
747                 /* not much we can do if it fails anyway, ignore rc */
748                 rc = 0;
749         } else
750                 spin_unlock(&cifs_file_list_lock);
751
752         buf = cfile->srch_inf.ntwrk_buf_start;
753         if (buf) {
754                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
755                 cfile->srch_inf.ntwrk_buf_start = NULL;
756                 if (cfile->srch_inf.smallBuf)
757                         cifs_small_buf_release(buf);
758                 else
759                         cifs_buf_release(buf);
760         }
761
762         cifs_put_tlink(cfile->tlink);
763         kfree(file->private_data);
764         file->private_data = NULL;
765         /* BB can we lock the filestruct while this is going on? */
766         free_xid(xid);
767         return rc;
768 }
769
770 static struct cifsLockInfo *
771 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
772 {
773         struct cifsLockInfo *lock =
774                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
775         if (!lock)
776                 return lock;
777         lock->offset = offset;
778         lock->length = length;
779         lock->type = type;
780         lock->pid = current->tgid;
781         INIT_LIST_HEAD(&lock->blist);
782         init_waitqueue_head(&lock->block_q);
783         return lock;
784 }
785
786 void
787 cifs_del_lock_waiters(struct cifsLockInfo *lock)
788 {
789         struct cifsLockInfo *li, *tmp;
790         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
791                 list_del_init(&li->blist);
792                 wake_up(&li->block_q);
793         }
794 }
795
796 #define CIFS_LOCK_OP    0
797 #define CIFS_READ_OP    1
798 #define CIFS_WRITE_OP   2
799
800 /* @rw_check : 0 - no op, 1 - read, 2 - write */
801 static bool
802 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
803                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
804                             struct cifsLockInfo **conf_lock, int rw_check)
805 {
806         struct cifsLockInfo *li;
807         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
808         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
809
810         list_for_each_entry(li, &fdlocks->locks, llist) {
811                 if (offset + length <= li->offset ||
812                     offset >= li->offset + li->length)
813                         continue;
814                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
815                     server->ops->compare_fids(cfile, cur_cfile)) {
816                         /* shared lock prevents write op through the same fid */
817                         if (!(li->type & server->vals->shared_lock_type) ||
818                             rw_check != CIFS_WRITE_OP)
819                                 continue;
820                 }
821                 if ((type & server->vals->shared_lock_type) &&
822                     ((server->ops->compare_fids(cfile, cur_cfile) &&
823                      current->tgid == li->pid) || type == li->type))
824                         continue;
825                 if (conf_lock)
826                         *conf_lock = li;
827                 return true;
828         }
829         return false;
830 }
831
832 bool
833 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
834                         __u8 type, struct cifsLockInfo **conf_lock,
835                         int rw_check)
836 {
837         bool rc = false;
838         struct cifs_fid_locks *cur;
839         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
840
841         list_for_each_entry(cur, &cinode->llist, llist) {
842                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
843                                                  cfile, conf_lock, rw_check);
844                 if (rc)
845                         break;
846         }
847
848         return rc;
849 }
850
851 /*
852  * Check if there is another lock that prevents us to set the lock (mandatory
853  * style). If such a lock exists, update the flock structure with its
854  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
855  * or leave it the same if we can't. Returns 0 if we don't need to request to
856  * the server or 1 otherwise.
857  */
858 static int
859 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
860                __u8 type, struct file_lock *flock)
861 {
862         int rc = 0;
863         struct cifsLockInfo *conf_lock;
864         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
865         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
866         bool exist;
867
868         down_read(&cinode->lock_sem);
869
870         exist = cifs_find_lock_conflict(cfile, offset, length, type,
871                                         &conf_lock, CIFS_LOCK_OP);
872         if (exist) {
873                 flock->fl_start = conf_lock->offset;
874                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
875                 flock->fl_pid = conf_lock->pid;
876                 if (conf_lock->type & server->vals->shared_lock_type)
877                         flock->fl_type = F_RDLCK;
878                 else
879                         flock->fl_type = F_WRLCK;
880         } else if (!cinode->can_cache_brlcks)
881                 rc = 1;
882         else
883                 flock->fl_type = F_UNLCK;
884
885         up_read(&cinode->lock_sem);
886         return rc;
887 }
888
889 static void
890 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
891 {
892         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
893         down_write(&cinode->lock_sem);
894         list_add_tail(&lock->llist, &cfile->llist->locks);
895         up_write(&cinode->lock_sem);
896 }
897
898 /*
899  * Set the byte-range lock (mandatory style). Returns:
900  * 1) 0, if we set the lock and don't need to request to the server;
901  * 2) 1, if no locks prevent us but we need to request to the server;
902  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
903  */
904 static int
905 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
906                  bool wait)
907 {
908         struct cifsLockInfo *conf_lock;
909         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
910         bool exist;
911         int rc = 0;
912
913 try_again:
914         exist = false;
915         down_write(&cinode->lock_sem);
916
917         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
918                                         lock->type, &conf_lock, CIFS_LOCK_OP);
919         if (!exist && cinode->can_cache_brlcks) {
920                 list_add_tail(&lock->llist, &cfile->llist->locks);
921                 up_write(&cinode->lock_sem);
922                 return rc;
923         }
924
925         if (!exist)
926                 rc = 1;
927         else if (!wait)
928                 rc = -EACCES;
929         else {
930                 list_add_tail(&lock->blist, &conf_lock->blist);
931                 up_write(&cinode->lock_sem);
932                 rc = wait_event_interruptible(lock->block_q,
933                                         (lock->blist.prev == &lock->blist) &&
934                                         (lock->blist.next == &lock->blist));
935                 if (!rc)
936                         goto try_again;
937                 down_write(&cinode->lock_sem);
938                 list_del_init(&lock->blist);
939         }
940
941         up_write(&cinode->lock_sem);
942         return rc;
943 }
944
945 /*
946  * Check if there is another lock that prevents us to set the lock (posix
947  * style). If such a lock exists, update the flock structure with its
948  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
949  * or leave it the same if we can't. Returns 0 if we don't need to request to
950  * the server or 1 otherwise.
951  */
952 static int
953 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
954 {
955         int rc = 0;
956         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
957         unsigned char saved_type = flock->fl_type;
958
959         if ((flock->fl_flags & FL_POSIX) == 0)
960                 return 1;
961
962         down_read(&cinode->lock_sem);
963         posix_test_lock(file, flock);
964
965         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
966                 flock->fl_type = saved_type;
967                 rc = 1;
968         }
969
970         up_read(&cinode->lock_sem);
971         return rc;
972 }
973
974 /*
975  * Set the byte-range lock (posix style). Returns:
976  * 1) 0, if we set the lock and don't need to request to the server;
977  * 2) 1, if we need to request to the server;
978  * 3) <0, if the error occurs while setting the lock.
979  */
980 static int
981 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
982 {
983         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
984         int rc = 1;
985
986         if ((flock->fl_flags & FL_POSIX) == 0)
987                 return rc;
988
989 try_again:
990         down_write(&cinode->lock_sem);
991         if (!cinode->can_cache_brlcks) {
992                 up_write(&cinode->lock_sem);
993                 return rc;
994         }
995
996         rc = posix_lock_file(file, flock, NULL);
997         up_write(&cinode->lock_sem);
998         if (rc == FILE_LOCK_DEFERRED) {
999                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1000                 if (!rc)
1001                         goto try_again;
1002                 locks_delete_block(flock);
1003         }
1004         return rc;
1005 }
1006
1007 int
1008 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1009 {
1010         unsigned int xid;
1011         int rc = 0, stored_rc;
1012         struct cifsLockInfo *li, *tmp;
1013         struct cifs_tcon *tcon;
1014         unsigned int num, max_num, max_buf;
1015         LOCKING_ANDX_RANGE *buf, *cur;
1016         int types[] = {LOCKING_ANDX_LARGE_FILES,
1017                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1018         int i;
1019
1020         xid = get_xid();
1021         tcon = tlink_tcon(cfile->tlink);
1022
1023         /*
1024          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1025          * and check it for zero before using.
1026          */
1027         max_buf = tcon->ses->server->maxBuf;
1028         if (!max_buf) {
1029                 free_xid(xid);
1030                 return -EINVAL;
1031         }
1032
1033         max_num = (max_buf - sizeof(struct smb_hdr)) /
1034                                                 sizeof(LOCKING_ANDX_RANGE);
1035         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1036         if (!buf) {
1037                 free_xid(xid);
1038                 return -ENOMEM;
1039         }
1040
1041         for (i = 0; i < 2; i++) {
1042                 cur = buf;
1043                 num = 0;
1044                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1045                         if (li->type != types[i])
1046                                 continue;
1047                         cur->Pid = cpu_to_le16(li->pid);
1048                         cur->LengthLow = cpu_to_le32((u32)li->length);
1049                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1050                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1051                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1052                         if (++num == max_num) {
1053                                 stored_rc = cifs_lockv(xid, tcon,
1054                                                        cfile->fid.netfid,
1055                                                        (__u8)li->type, 0, num,
1056                                                        buf);
1057                                 if (stored_rc)
1058                                         rc = stored_rc;
1059                                 cur = buf;
1060                                 num = 0;
1061                         } else
1062                                 cur++;
1063                 }
1064
1065                 if (num) {
1066                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1067                                                (__u8)types[i], 0, num, buf);
1068                         if (stored_rc)
1069                                 rc = stored_rc;
1070                 }
1071         }
1072
1073         kfree(buf);
1074         free_xid(xid);
1075         return rc;
1076 }
1077
1078 /* copied from fs/locks.c with a name change */
1079 #define cifs_for_each_lock(inode, lockp) \
1080         for (lockp = &inode->i_flock; *lockp != NULL; \
1081              lockp = &(*lockp)->fl_next)
1082
1083 struct lock_to_push {
1084         struct list_head llist;
1085         __u64 offset;
1086         __u64 length;
1087         __u32 pid;
1088         __u16 netfid;
1089         __u8 type;
1090 };
1091
1092 static int
1093 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1094 {
1095         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1096         struct file_lock *flock, **before;
1097         unsigned int count = 0, i = 0;
1098         int rc = 0, xid, type;
1099         struct list_head locks_to_send, *el;
1100         struct lock_to_push *lck, *tmp;
1101         __u64 length;
1102
1103         xid = get_xid();
1104
1105         lock_flocks();
1106         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1107                 if ((*before)->fl_flags & FL_POSIX)
1108                         count++;
1109         }
1110         unlock_flocks();
1111
1112         INIT_LIST_HEAD(&locks_to_send);
1113
1114         /*
1115          * Allocating count locks is enough because no FL_POSIX locks can be
1116          * added to the list while we are holding cinode->lock_sem that
1117          * protects locking operations of this inode.
1118          */
1119         for (; i < count; i++) {
1120                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1121                 if (!lck) {
1122                         rc = -ENOMEM;
1123                         goto err_out;
1124                 }
1125                 list_add_tail(&lck->llist, &locks_to_send);
1126         }
1127
1128         el = locks_to_send.next;
1129         lock_flocks();
1130         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1131                 flock = *before;
1132                 if ((flock->fl_flags & FL_POSIX) == 0)
1133                         continue;
1134                 if (el == &locks_to_send) {
1135                         /*
1136                          * The list ended. We don't have enough allocated
1137                          * structures - something is really wrong.
1138                          */
1139                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1140                         break;
1141                 }
1142                 length = 1 + flock->fl_end - flock->fl_start;
1143                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1144                         type = CIFS_RDLCK;
1145                 else
1146                         type = CIFS_WRLCK;
1147                 lck = list_entry(el, struct lock_to_push, llist);
1148                 lck->pid = flock->fl_pid;
1149                 lck->netfid = cfile->fid.netfid;
1150                 lck->length = length;
1151                 lck->type = type;
1152                 lck->offset = flock->fl_start;
1153                 el = el->next;
1154         }
1155         unlock_flocks();
1156
1157         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1158                 int stored_rc;
1159
1160                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1161                                              lck->offset, lck->length, NULL,
1162                                              lck->type, 0);
1163                 if (stored_rc)
1164                         rc = stored_rc;
1165                 list_del(&lck->llist);
1166                 kfree(lck);
1167         }
1168
1169 out:
1170         free_xid(xid);
1171         return rc;
1172 err_out:
1173         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1174                 list_del(&lck->llist);
1175                 kfree(lck);
1176         }
1177         goto out;
1178 }
1179
1180 static int
1181 cifs_push_locks(struct cifsFileInfo *cfile)
1182 {
1183         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1184         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1185         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1186         int rc = 0;
1187
1188         /* we are going to update can_cache_brlcks here - need a write access */
1189         down_write(&cinode->lock_sem);
1190         if (!cinode->can_cache_brlcks) {
1191                 up_write(&cinode->lock_sem);
1192                 return rc;
1193         }
1194
1195         if (cap_unix(tcon->ses) &&
1196             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1197             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1198                 rc = cifs_push_posix_locks(cfile);
1199         else
1200                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1201
1202         cinode->can_cache_brlcks = false;
1203         up_write(&cinode->lock_sem);
1204         return rc;
1205 }
1206
1207 static void
1208 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1209                 bool *wait_flag, struct TCP_Server_Info *server)
1210 {
1211         if (flock->fl_flags & FL_POSIX)
1212                 cifs_dbg(FYI, "Posix\n");
1213         if (flock->fl_flags & FL_FLOCK)
1214                 cifs_dbg(FYI, "Flock\n");
1215         if (flock->fl_flags & FL_SLEEP) {
1216                 cifs_dbg(FYI, "Blocking lock\n");
1217                 *wait_flag = true;
1218         }
1219         if (flock->fl_flags & FL_ACCESS)
1220                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1221         if (flock->fl_flags & FL_LEASE)
1222                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1223         if (flock->fl_flags &
1224             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1225                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1226                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1227
1228         *type = server->vals->large_lock_type;
1229         if (flock->fl_type == F_WRLCK) {
1230                 cifs_dbg(FYI, "F_WRLCK\n");
1231                 *type |= server->vals->exclusive_lock_type;
1232                 *lock = 1;
1233         } else if (flock->fl_type == F_UNLCK) {
1234                 cifs_dbg(FYI, "F_UNLCK\n");
1235                 *type |= server->vals->unlock_lock_type;
1236                 *unlock = 1;
1237                 /* Check if unlock includes more than one lock range */
1238         } else if (flock->fl_type == F_RDLCK) {
1239                 cifs_dbg(FYI, "F_RDLCK\n");
1240                 *type |= server->vals->shared_lock_type;
1241                 *lock = 1;
1242         } else if (flock->fl_type == F_EXLCK) {
1243                 cifs_dbg(FYI, "F_EXLCK\n");
1244                 *type |= server->vals->exclusive_lock_type;
1245                 *lock = 1;
1246         } else if (flock->fl_type == F_SHLCK) {
1247                 cifs_dbg(FYI, "F_SHLCK\n");
1248                 *type |= server->vals->shared_lock_type;
1249                 *lock = 1;
1250         } else
1251                 cifs_dbg(FYI, "Unknown type of lock\n");
1252 }
1253
1254 static int
1255 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1256            bool wait_flag, bool posix_lck, unsigned int xid)
1257 {
1258         int rc = 0;
1259         __u64 length = 1 + flock->fl_end - flock->fl_start;
1260         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1261         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1262         struct TCP_Server_Info *server = tcon->ses->server;
1263         __u16 netfid = cfile->fid.netfid;
1264
1265         if (posix_lck) {
1266                 int posix_lock_type;
1267
1268                 rc = cifs_posix_lock_test(file, flock);
1269                 if (!rc)
1270                         return rc;
1271
1272                 if (type & server->vals->shared_lock_type)
1273                         posix_lock_type = CIFS_RDLCK;
1274                 else
1275                         posix_lock_type = CIFS_WRLCK;
1276                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1277                                       flock->fl_start, length, flock,
1278                                       posix_lock_type, wait_flag);
1279                 return rc;
1280         }
1281
1282         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1283         if (!rc)
1284                 return rc;
1285
1286         /* BB we could chain these into one lock request BB */
1287         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1288                                     1, 0, false);
1289         if (rc == 0) {
1290                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1291                                             type, 0, 1, false);
1292                 flock->fl_type = F_UNLCK;
1293                 if (rc != 0)
1294                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1295                                  rc);
1296                 return 0;
1297         }
1298
1299         if (type & server->vals->shared_lock_type) {
1300                 flock->fl_type = F_WRLCK;
1301                 return 0;
1302         }
1303
1304         type &= ~server->vals->exclusive_lock_type;
1305
1306         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1307                                     type | server->vals->shared_lock_type,
1308                                     1, 0, false);
1309         if (rc == 0) {
1310                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1311                         type | server->vals->shared_lock_type, 0, 1, false);
1312                 flock->fl_type = F_RDLCK;
1313                 if (rc != 0)
1314                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1315                                  rc);
1316         } else
1317                 flock->fl_type = F_WRLCK;
1318
1319         return 0;
1320 }
1321
1322 void
1323 cifs_move_llist(struct list_head *source, struct list_head *dest)
1324 {
1325         struct list_head *li, *tmp;
1326         list_for_each_safe(li, tmp, source)
1327                 list_move(li, dest);
1328 }
1329
1330 void
1331 cifs_free_llist(struct list_head *llist)
1332 {
1333         struct cifsLockInfo *li, *tmp;
1334         list_for_each_entry_safe(li, tmp, llist, llist) {
1335                 cifs_del_lock_waiters(li);
1336                 list_del(&li->llist);
1337                 kfree(li);
1338         }
1339 }
1340
1341 int
1342 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1343                   unsigned int xid)
1344 {
1345         int rc = 0, stored_rc;
1346         int types[] = {LOCKING_ANDX_LARGE_FILES,
1347                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1348         unsigned int i;
1349         unsigned int max_num, num, max_buf;
1350         LOCKING_ANDX_RANGE *buf, *cur;
1351         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1353         struct cifsLockInfo *li, *tmp;
1354         __u64 length = 1 + flock->fl_end - flock->fl_start;
1355         struct list_head tmp_llist;
1356
1357         INIT_LIST_HEAD(&tmp_llist);
1358
1359         /*
1360          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1361          * and check it for zero before using.
1362          */
1363         max_buf = tcon->ses->server->maxBuf;
1364         if (!max_buf)
1365                 return -EINVAL;
1366
1367         max_num = (max_buf - sizeof(struct smb_hdr)) /
1368                                                 sizeof(LOCKING_ANDX_RANGE);
1369         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1370         if (!buf)
1371                 return -ENOMEM;
1372
1373         down_write(&cinode->lock_sem);
1374         for (i = 0; i < 2; i++) {
1375                 cur = buf;
1376                 num = 0;
1377                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1378                         if (flock->fl_start > li->offset ||
1379                             (flock->fl_start + length) <
1380                             (li->offset + li->length))
1381                                 continue;
1382                         if (current->tgid != li->pid)
1383                                 continue;
1384                         if (types[i] != li->type)
1385                                 continue;
1386                         if (cinode->can_cache_brlcks) {
1387                                 /*
1388                                  * We can cache brlock requests - simply remove
1389                                  * a lock from the file's list.
1390                                  */
1391                                 list_del(&li->llist);
1392                                 cifs_del_lock_waiters(li);
1393                                 kfree(li);
1394                                 continue;
1395                         }
1396                         cur->Pid = cpu_to_le16(li->pid);
1397                         cur->LengthLow = cpu_to_le32((u32)li->length);
1398                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1399                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1400                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1401                         /*
1402                          * We need to save a lock here to let us add it again to
1403                          * the file's list if the unlock range request fails on
1404                          * the server.
1405                          */
1406                         list_move(&li->llist, &tmp_llist);
1407                         if (++num == max_num) {
1408                                 stored_rc = cifs_lockv(xid, tcon,
1409                                                        cfile->fid.netfid,
1410                                                        li->type, num, 0, buf);
1411                                 if (stored_rc) {
1412                                         /*
1413                                          * We failed on the unlock range
1414                                          * request - add all locks from the tmp
1415                                          * list to the head of the file's list.
1416                                          */
1417                                         cifs_move_llist(&tmp_llist,
1418                                                         &cfile->llist->locks);
1419                                         rc = stored_rc;
1420                                 } else
1421                                         /*
1422                                          * The unlock range request succeed -
1423                                          * free the tmp list.
1424                                          */
1425                                         cifs_free_llist(&tmp_llist);
1426                                 cur = buf;
1427                                 num = 0;
1428                         } else
1429                                 cur++;
1430                 }
1431                 if (num) {
1432                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1433                                                types[i], num, 0, buf);
1434                         if (stored_rc) {
1435                                 cifs_move_llist(&tmp_llist,
1436                                                 &cfile->llist->locks);
1437                                 rc = stored_rc;
1438                         } else
1439                                 cifs_free_llist(&tmp_llist);
1440                 }
1441         }
1442
1443         up_write(&cinode->lock_sem);
1444         kfree(buf);
1445         return rc;
1446 }
1447
1448 static int
1449 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1450            bool wait_flag, bool posix_lck, int lock, int unlock,
1451            unsigned int xid)
1452 {
1453         int rc = 0;
1454         __u64 length = 1 + flock->fl_end - flock->fl_start;
1455         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1456         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1457         struct TCP_Server_Info *server = tcon->ses->server;
1458         struct inode *inode = cfile->dentry->d_inode;
1459
1460         if (posix_lck) {
1461                 int posix_lock_type;
1462
1463                 rc = cifs_posix_lock_set(file, flock);
1464                 if (!rc || rc < 0)
1465                         return rc;
1466
1467                 if (type & server->vals->shared_lock_type)
1468                         posix_lock_type = CIFS_RDLCK;
1469                 else
1470                         posix_lock_type = CIFS_WRLCK;
1471
1472                 if (unlock == 1)
1473                         posix_lock_type = CIFS_UNLCK;
1474
1475                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1476                                       current->tgid, flock->fl_start, length,
1477                                       NULL, posix_lock_type, wait_flag);
1478                 goto out;
1479         }
1480
1481         if (lock) {
1482                 struct cifsLockInfo *lock;
1483
1484                 lock = cifs_lock_init(flock->fl_start, length, type);
1485                 if (!lock)
1486                         return -ENOMEM;
1487
1488                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1489                 if (rc < 0) {
1490                         kfree(lock);
1491                         return rc;
1492                 }
1493                 if (!rc)
1494                         goto out;
1495
1496                 /*
1497                  * Windows 7 server can delay breaking lease from read to None
1498                  * if we set a byte-range lock on a file - break it explicitly
1499                  * before sending the lock to the server to be sure the next
1500                  * read won't conflict with non-overlapted locks due to
1501                  * pagereading.
1502                  */
1503                 if (!CIFS_I(inode)->clientCanCacheAll &&
1504                                         CIFS_I(inode)->clientCanCacheRead) {
1505                         cifs_invalidate_mapping(inode);
1506                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1507                                  inode);
1508                         CIFS_I(inode)->clientCanCacheRead = false;
1509                 }
1510
1511                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1512                                             type, 1, 0, wait_flag);
1513                 if (rc) {
1514                         kfree(lock);
1515                         return rc;
1516                 }
1517
1518                 cifs_lock_add(cfile, lock);
1519         } else if (unlock)
1520                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1521
1522 out:
1523         if (flock->fl_flags & FL_POSIX)
1524                 posix_lock_file_wait(file, flock);
1525         return rc;
1526 }
1527
1528 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1529 {
1530         int rc, xid;
1531         int lock = 0, unlock = 0;
1532         bool wait_flag = false;
1533         bool posix_lck = false;
1534         struct cifs_sb_info *cifs_sb;
1535         struct cifs_tcon *tcon;
1536         struct cifsInodeInfo *cinode;
1537         struct cifsFileInfo *cfile;
1538         __u16 netfid;
1539         __u32 type;
1540
1541         rc = -EACCES;
1542         xid = get_xid();
1543
1544         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1545                  cmd, flock->fl_flags, flock->fl_type,
1546                  flock->fl_start, flock->fl_end);
1547
1548         cfile = (struct cifsFileInfo *)file->private_data;
1549         tcon = tlink_tcon(cfile->tlink);
1550
1551         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1552                         tcon->ses->server);
1553
1554         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1555         netfid = cfile->fid.netfid;
1556         cinode = CIFS_I(file_inode(file));
1557
1558         if (cap_unix(tcon->ses) &&
1559             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1560             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1561                 posix_lck = true;
1562         /*
1563          * BB add code here to normalize offset and length to account for
1564          * negative length which we can not accept over the wire.
1565          */
1566         if (IS_GETLK(cmd)) {
1567                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1568                 free_xid(xid);
1569                 return rc;
1570         }
1571
1572         if (!lock && !unlock) {
1573                 /*
1574                  * if no lock or unlock then nothing to do since we do not
1575                  * know what it is
1576                  */
1577                 free_xid(xid);
1578                 return -EOPNOTSUPP;
1579         }
1580
1581         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1582                         xid);
1583         free_xid(xid);
1584         return rc;
1585 }
1586
1587 /*
1588  * update the file size (if needed) after a write. Should be called with
1589  * the inode->i_lock held
1590  */
1591 void
1592 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1593                       unsigned int bytes_written)
1594 {
1595         loff_t end_of_write = offset + bytes_written;
1596
1597         if (end_of_write > cifsi->server_eof)
1598                 cifsi->server_eof = end_of_write;
1599 }
1600
1601 static ssize_t
1602 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1603            size_t write_size, loff_t *offset)
1604 {
1605         int rc = 0;
1606         unsigned int bytes_written = 0;
1607         unsigned int total_written;
1608         struct cifs_sb_info *cifs_sb;
1609         struct cifs_tcon *tcon;
1610         struct TCP_Server_Info *server;
1611         unsigned int xid;
1612         struct dentry *dentry = open_file->dentry;
1613         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1614         struct cifs_io_parms io_parms;
1615
1616         cifs_sb = CIFS_SB(dentry->d_sb);
1617
1618         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1619                  write_size, *offset, dentry->d_name.name);
1620
1621         tcon = tlink_tcon(open_file->tlink);
1622         server = tcon->ses->server;
1623
1624         if (!server->ops->sync_write)
1625                 return -ENOSYS;
1626
1627         xid = get_xid();
1628
1629         for (total_written = 0; write_size > total_written;
1630              total_written += bytes_written) {
1631                 rc = -EAGAIN;
1632                 while (rc == -EAGAIN) {
1633                         struct kvec iov[2];
1634                         unsigned int len;
1635
1636                         if (open_file->invalidHandle) {
1637                                 /* we could deadlock if we called
1638                                    filemap_fdatawait from here so tell
1639                                    reopen_file not to flush data to
1640                                    server now */
1641                                 rc = cifs_reopen_file(open_file, false);
1642                                 if (rc != 0)
1643                                         break;
1644                         }
1645
1646                         len = min((size_t)cifs_sb->wsize,
1647                                   write_size - total_written);
1648                         /* iov[0] is reserved for smb header */
1649                         iov[1].iov_base = (char *)write_data + total_written;
1650                         iov[1].iov_len = len;
1651                         io_parms.pid = pid;
1652                         io_parms.tcon = tcon;
1653                         io_parms.offset = *offset;
1654                         io_parms.length = len;
1655                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1656                                                      &bytes_written, iov, 1);
1657                 }
1658                 if (rc || (bytes_written == 0)) {
1659                         if (total_written)
1660                                 break;
1661                         else {
1662                                 free_xid(xid);
1663                                 return rc;
1664                         }
1665                 } else {
1666                         spin_lock(&dentry->d_inode->i_lock);
1667                         cifs_update_eof(cifsi, *offset, bytes_written);
1668                         spin_unlock(&dentry->d_inode->i_lock);
1669                         *offset += bytes_written;
1670                 }
1671         }
1672
1673         cifs_stats_bytes_written(tcon, total_written);
1674
1675         if (total_written > 0) {
1676                 spin_lock(&dentry->d_inode->i_lock);
1677                 if (*offset > dentry->d_inode->i_size)
1678                         i_size_write(dentry->d_inode, *offset);
1679                 spin_unlock(&dentry->d_inode->i_lock);
1680         }
1681         mark_inode_dirty_sync(dentry->d_inode);
1682         free_xid(xid);
1683         return total_written;
1684 }
1685
1686 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1687                                         bool fsuid_only)
1688 {
1689         struct cifsFileInfo *open_file = NULL;
1690         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1691
1692         /* only filter by fsuid on multiuser mounts */
1693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1694                 fsuid_only = false;
1695
1696         spin_lock(&cifs_file_list_lock);
1697         /* we could simply get the first_list_entry since write-only entries
1698            are always at the end of the list but since the first entry might
1699            have a close pending, we go through the whole list */
1700         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1701                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1702                         continue;
1703                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1704                         if (!open_file->invalidHandle) {
1705                                 /* found a good file */
1706                                 /* lock it so it will not be closed on us */
1707                                 cifsFileInfo_get_locked(open_file);
1708                                 spin_unlock(&cifs_file_list_lock);
1709                                 return open_file;
1710                         } /* else might as well continue, and look for
1711                              another, or simply have the caller reopen it
1712                              again rather than trying to fix this handle */
1713                 } else /* write only file */
1714                         break; /* write only files are last so must be done */
1715         }
1716         spin_unlock(&cifs_file_list_lock);
1717         return NULL;
1718 }
1719
1720 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1721                                         bool fsuid_only)
1722 {
1723         struct cifsFileInfo *open_file, *inv_file = NULL;
1724         struct cifs_sb_info *cifs_sb;
1725         bool any_available = false;
1726         int rc;
1727         unsigned int refind = 0;
1728
1729         /* Having a null inode here (because mapping->host was set to zero by
1730         the VFS or MM) should not happen but we had reports of on oops (due to
1731         it being zero) during stress testcases so we need to check for it */
1732
1733         if (cifs_inode == NULL) {
1734                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1735                 dump_stack();
1736                 return NULL;
1737         }
1738
1739         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1740
1741         /* only filter by fsuid on multiuser mounts */
1742         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1743                 fsuid_only = false;
1744
1745         spin_lock(&cifs_file_list_lock);
1746 refind_writable:
1747         if (refind > MAX_REOPEN_ATT) {
1748                 spin_unlock(&cifs_file_list_lock);
1749                 return NULL;
1750         }
1751         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1752                 if (!any_available && open_file->pid != current->tgid)
1753                         continue;
1754                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1755                         continue;
1756                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1757                         if (!open_file->invalidHandle) {
1758                                 /* found a good writable file */
1759                                 cifsFileInfo_get_locked(open_file);
1760                                 spin_unlock(&cifs_file_list_lock);
1761                                 return open_file;
1762                         } else {
1763                                 if (!inv_file)
1764                                         inv_file = open_file;
1765                         }
1766                 }
1767         }
1768         /* couldn't find useable FH with same pid, try any available */
1769         if (!any_available) {
1770                 any_available = true;
1771                 goto refind_writable;
1772         }
1773
1774         if (inv_file) {
1775                 any_available = false;
1776                 cifsFileInfo_get_locked(inv_file);
1777         }
1778
1779         spin_unlock(&cifs_file_list_lock);
1780
1781         if (inv_file) {
1782                 rc = cifs_reopen_file(inv_file, false);
1783                 if (!rc)
1784                         return inv_file;
1785                 else {
1786                         spin_lock(&cifs_file_list_lock);
1787                         list_move_tail(&inv_file->flist,
1788                                         &cifs_inode->openFileList);
1789                         spin_unlock(&cifs_file_list_lock);
1790                         cifsFileInfo_put(inv_file);
1791                         spin_lock(&cifs_file_list_lock);
1792                         ++refind;
1793                         goto refind_writable;
1794                 }
1795         }
1796
1797         return NULL;
1798 }
1799
1800 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1801 {
1802         struct address_space *mapping = page->mapping;
1803         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1804         char *write_data;
1805         int rc = -EFAULT;
1806         int bytes_written = 0;
1807         struct inode *inode;
1808         struct cifsFileInfo *open_file;
1809
1810         if (!mapping || !mapping->host)
1811                 return -EFAULT;
1812
1813         inode = page->mapping->host;
1814
1815         offset += (loff_t)from;
1816         write_data = kmap(page);
1817         write_data += from;
1818
1819         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1820                 kunmap(page);
1821                 return -EIO;
1822         }
1823
1824         /* racing with truncate? */
1825         if (offset > mapping->host->i_size) {
1826                 kunmap(page);
1827                 return 0; /* don't care */
1828         }
1829
1830         /* check to make sure that we are not extending the file */
1831         if (mapping->host->i_size - offset < (loff_t)to)
1832                 to = (unsigned)(mapping->host->i_size - offset);
1833
1834         open_file = find_writable_file(CIFS_I(mapping->host), false);
1835         if (open_file) {
1836                 bytes_written = cifs_write(open_file, open_file->pid,
1837                                            write_data, to - from, &offset);
1838                 cifsFileInfo_put(open_file);
1839                 /* Does mm or vfs already set times? */
1840                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1841                 if ((bytes_written > 0) && (offset))
1842                         rc = 0;
1843                 else if (bytes_written < 0)
1844                         rc = bytes_written;
1845         } else {
1846                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1847                 rc = -EIO;
1848         }
1849
1850         kunmap(page);
1851         return rc;
1852 }
1853
1854 static int cifs_writepages(struct address_space *mapping,
1855                            struct writeback_control *wbc)
1856 {
1857         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1858         bool done = false, scanned = false, range_whole = false;
1859         pgoff_t end, index;
1860         struct cifs_writedata *wdata;
1861         struct TCP_Server_Info *server;
1862         struct page *page;
1863         int rc = 0;
1864
1865         /*
1866          * If wsize is smaller than the page cache size, default to writing
1867          * one page at a time via cifs_writepage
1868          */
1869         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1870                 return generic_writepages(mapping, wbc);
1871
1872         if (wbc->range_cyclic) {
1873                 index = mapping->writeback_index; /* Start from prev offset */
1874                 end = -1;
1875         } else {
1876                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1877                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1878                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1879                         range_whole = true;
1880                 scanned = true;
1881         }
1882 retry:
1883         while (!done && index <= end) {
1884                 unsigned int i, nr_pages, found_pages;
1885                 pgoff_t next = 0, tofind;
1886                 struct page **pages;
1887
1888                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1889                                 end - index) + 1;
1890
1891                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1892                                              cifs_writev_complete);
1893                 if (!wdata) {
1894                         rc = -ENOMEM;
1895                         break;
1896                 }
1897
1898                 /*
1899                  * find_get_pages_tag seems to return a max of 256 on each
1900                  * iteration, so we must call it several times in order to
1901                  * fill the array or the wsize is effectively limited to
1902                  * 256 * PAGE_CACHE_SIZE.
1903                  */
1904                 found_pages = 0;
1905                 pages = wdata->pages;
1906                 do {
1907                         nr_pages = find_get_pages_tag(mapping, &index,
1908                                                         PAGECACHE_TAG_DIRTY,
1909                                                         tofind, pages);
1910                         found_pages += nr_pages;
1911                         tofind -= nr_pages;
1912                         pages += nr_pages;
1913                 } while (nr_pages && tofind && index <= end);
1914
1915                 if (found_pages == 0) {
1916                         kref_put(&wdata->refcount, cifs_writedata_release);
1917                         break;
1918                 }
1919
1920                 nr_pages = 0;
1921                 for (i = 0; i < found_pages; i++) {
1922                         page = wdata->pages[i];
1923                         /*
1924                          * At this point we hold neither mapping->tree_lock nor
1925                          * lock on the page itself: the page may be truncated or
1926                          * invalidated (changing page->mapping to NULL), or even
1927                          * swizzled back from swapper_space to tmpfs file
1928                          * mapping
1929                          */
1930
1931                         if (nr_pages == 0)
1932                                 lock_page(page);
1933                         else if (!trylock_page(page))
1934                                 break;
1935
1936                         if (unlikely(page->mapping != mapping)) {
1937                                 unlock_page(page);
1938                                 break;
1939                         }
1940
1941                         if (!wbc->range_cyclic && page->index > end) {
1942                                 done = true;
1943                                 unlock_page(page);
1944                                 break;
1945                         }
1946
1947                         if (next && (page->index != next)) {
1948                                 /* Not next consecutive page */
1949                                 unlock_page(page);
1950                                 break;
1951                         }
1952
1953                         if (wbc->sync_mode != WB_SYNC_NONE)
1954                                 wait_on_page_writeback(page);
1955
1956                         if (PageWriteback(page) ||
1957                                         !clear_page_dirty_for_io(page)) {
1958                                 unlock_page(page);
1959                                 break;
1960                         }
1961
1962                         /*
1963                          * This actually clears the dirty bit in the radix tree.
1964                          * See cifs_writepage() for more commentary.
1965                          */
1966                         set_page_writeback(page);
1967
1968                         if (page_offset(page) >= i_size_read(mapping->host)) {
1969                                 done = true;
1970                                 unlock_page(page);
1971                                 end_page_writeback(page);
1972                                 break;
1973                         }
1974
1975                         wdata->pages[i] = page;
1976                         next = page->index + 1;
1977                         ++nr_pages;
1978                 }
1979
1980                 /* reset index to refind any pages skipped */
1981                 if (nr_pages == 0)
1982                         index = wdata->pages[0]->index + 1;
1983
1984                 /* put any pages we aren't going to use */
1985                 for (i = nr_pages; i < found_pages; i++) {
1986                         page_cache_release(wdata->pages[i]);
1987                         wdata->pages[i] = NULL;
1988                 }
1989
1990                 /* nothing to write? */
1991                 if (nr_pages == 0) {
1992                         kref_put(&wdata->refcount, cifs_writedata_release);
1993                         continue;
1994                 }
1995
1996                 wdata->sync_mode = wbc->sync_mode;
1997                 wdata->nr_pages = nr_pages;
1998                 wdata->offset = page_offset(wdata->pages[0]);
1999                 wdata->pagesz = PAGE_CACHE_SIZE;
2000                 wdata->tailsz =
2001                         min(i_size_read(mapping->host) -
2002                             page_offset(wdata->pages[nr_pages - 1]),
2003                             (loff_t)PAGE_CACHE_SIZE);
2004                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2005                                         wdata->tailsz;
2006
2007                 do {
2008                         if (wdata->cfile != NULL)
2009                                 cifsFileInfo_put(wdata->cfile);
2010                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2011                                                           false);
2012                         if (!wdata->cfile) {
2013                                 cifs_dbg(VFS, "No writable handles for inode\n");
2014                                 rc = -EBADF;
2015                                 break;
2016                         }
2017                         wdata->pid = wdata->cfile->pid;
2018                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2019                         rc = server->ops->async_writev(wdata);
2020                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2021
2022                 for (i = 0; i < nr_pages; ++i)
2023                         unlock_page(wdata->pages[i]);
2024
2025                 /* send failure -- clean up the mess */
2026                 if (rc != 0) {
2027                         for (i = 0; i < nr_pages; ++i) {
2028                                 if (rc == -EAGAIN)
2029                                         redirty_page_for_writepage(wbc,
2030                                                            wdata->pages[i]);
2031                                 else
2032                                         SetPageError(wdata->pages[i]);
2033                                 end_page_writeback(wdata->pages[i]);
2034                                 page_cache_release(wdata->pages[i]);
2035                         }
2036                         if (rc != -EAGAIN)
2037                                 mapping_set_error(mapping, rc);
2038                 }
2039                 kref_put(&wdata->refcount, cifs_writedata_release);
2040
2041                 wbc->nr_to_write -= nr_pages;
2042                 if (wbc->nr_to_write <= 0)
2043                         done = true;
2044
2045                 index = next;
2046         }
2047
2048         if (!scanned && !done) {
2049                 /*
2050                  * We hit the last page and there is more work to be done: wrap
2051                  * back to the start of the file
2052                  */
2053                 scanned = true;
2054                 index = 0;
2055                 goto retry;
2056         }
2057
2058         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2059                 mapping->writeback_index = index;
2060
2061         return rc;
2062 }
2063
2064 static int
2065 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2066 {
2067         int rc;
2068         unsigned int xid;
2069
2070         xid = get_xid();
2071 /* BB add check for wbc flags */
2072         page_cache_get(page);
2073         if (!PageUptodate(page))
2074                 cifs_dbg(FYI, "ppw - page not up to date\n");
2075
2076         /*
2077          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2078          *
2079          * A writepage() implementation always needs to do either this,
2080          * or re-dirty the page with "redirty_page_for_writepage()" in
2081          * the case of a failure.
2082          *
2083          * Just unlocking the page will cause the radix tree tag-bits
2084          * to fail to update with the state of the page correctly.
2085          */
2086         set_page_writeback(page);
2087 retry_write:
2088         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2089         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2090                 goto retry_write;
2091         else if (rc == -EAGAIN)
2092                 redirty_page_for_writepage(wbc, page);
2093         else if (rc != 0)
2094                 SetPageError(page);
2095         else
2096                 SetPageUptodate(page);
2097         end_page_writeback(page);
2098         page_cache_release(page);
2099         free_xid(xid);
2100         return rc;
2101 }
2102
2103 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2104 {
2105         int rc = cifs_writepage_locked(page, wbc);
2106         unlock_page(page);
2107         return rc;
2108 }
2109
2110 static int cifs_write_end(struct file *file, struct address_space *mapping,
2111                         loff_t pos, unsigned len, unsigned copied,
2112                         struct page *page, void *fsdata)
2113 {
2114         int rc;
2115         struct inode *inode = mapping->host;
2116         struct cifsFileInfo *cfile = file->private_data;
2117         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2118         __u32 pid;
2119
2120         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2121                 pid = cfile->pid;
2122         else
2123                 pid = current->tgid;
2124
2125         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2126                  page, pos, copied);
2127
2128         if (PageChecked(page)) {
2129                 if (copied == len)
2130                         SetPageUptodate(page);
2131                 ClearPageChecked(page);
2132         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2133                 SetPageUptodate(page);
2134
2135         if (!PageUptodate(page)) {
2136                 char *page_data;
2137                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2138                 unsigned int xid;
2139
2140                 xid = get_xid();
2141                 /* this is probably better than directly calling
2142                    partialpage_write since in this function the file handle is
2143                    known which we might as well leverage */
2144                 /* BB check if anything else missing out of ppw
2145                    such as updating last write time */
2146                 page_data = kmap(page);
2147                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2148                 /* if (rc < 0) should we set writebehind rc? */
2149                 kunmap(page);
2150
2151                 free_xid(xid);
2152         } else {
2153                 rc = copied;
2154                 pos += copied;
2155                 set_page_dirty(page);
2156         }
2157
2158         if (rc > 0) {
2159                 spin_lock(&inode->i_lock);
2160                 if (pos > inode->i_size)
2161                         i_size_write(inode, pos);
2162                 spin_unlock(&inode->i_lock);
2163         }
2164
2165         unlock_page(page);
2166         page_cache_release(page);
2167
2168         return rc;
2169 }
2170
2171 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2172                       int datasync)
2173 {
2174         unsigned int xid;
2175         int rc = 0;
2176         struct cifs_tcon *tcon;
2177         struct TCP_Server_Info *server;
2178         struct cifsFileInfo *smbfile = file->private_data;
2179         struct inode *inode = file_inode(file);
2180         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2181
2182         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2183         if (rc)
2184                 return rc;
2185         mutex_lock(&inode->i_mutex);
2186
2187         xid = get_xid();
2188
2189         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2190                  file->f_path.dentry->d_name.name, datasync);
2191
2192         if (!CIFS_I(inode)->clientCanCacheRead) {
2193                 rc = cifs_invalidate_mapping(inode);
2194                 if (rc) {
2195                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2196                         rc = 0; /* don't care about it in fsync */
2197                 }
2198         }
2199
2200         tcon = tlink_tcon(smbfile->tlink);
2201         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2202                 server = tcon->ses->server;
2203                 if (server->ops->flush)
2204                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2205                 else
2206                         rc = -ENOSYS;
2207         }
2208
2209         free_xid(xid);
2210         mutex_unlock(&inode->i_mutex);
2211         return rc;
2212 }
2213
2214 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2215 {
2216         unsigned int xid;
2217         int rc = 0;
2218         struct cifs_tcon *tcon;
2219         struct TCP_Server_Info *server;
2220         struct cifsFileInfo *smbfile = file->private_data;
2221         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2222         struct inode *inode = file->f_mapping->host;
2223
2224         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2225         if (rc)
2226                 return rc;
2227         mutex_lock(&inode->i_mutex);
2228
2229         xid = get_xid();
2230
2231         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2232                  file->f_path.dentry->d_name.name, datasync);
2233
2234         tcon = tlink_tcon(smbfile->tlink);
2235         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2236                 server = tcon->ses->server;
2237                 if (server->ops->flush)
2238                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2239                 else
2240                         rc = -ENOSYS;
2241         }
2242
2243         free_xid(xid);
2244         mutex_unlock(&inode->i_mutex);
2245         return rc;
2246 }
2247
2248 /*
2249  * As file closes, flush all cached write data for this inode checking
2250  * for write behind errors.
2251  */
2252 int cifs_flush(struct file *file, fl_owner_t id)
2253 {
2254         struct inode *inode = file_inode(file);
2255         int rc = 0;
2256
2257         if (file->f_mode & FMODE_WRITE)
2258                 rc = filemap_write_and_wait(inode->i_mapping);
2259
2260         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2261
2262         return rc;
2263 }
2264
2265 static int
2266 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2267 {
2268         int rc = 0;
2269         unsigned long i;
2270
2271         for (i = 0; i < num_pages; i++) {
2272                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2273                 if (!pages[i]) {
2274                         /*
2275                          * save number of pages we have already allocated and
2276                          * return with ENOMEM error
2277                          */
2278                         num_pages = i;
2279                         rc = -ENOMEM;
2280                         break;
2281                 }
2282         }
2283
2284         if (rc) {
2285                 for (i = 0; i < num_pages; i++)
2286                         put_page(pages[i]);
2287         }
2288         return rc;
2289 }
2290
2291 static inline
2292 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2293 {
2294         size_t num_pages;
2295         size_t clen;
2296
2297         clen = min_t(const size_t, len, wsize);
2298         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2299
2300         if (cur_len)
2301                 *cur_len = clen;
2302
2303         return num_pages;
2304 }
2305
2306 static void
2307 cifs_uncached_writev_complete(struct work_struct *work)
2308 {
2309         int i;
2310         struct cifs_writedata *wdata = container_of(work,
2311                                         struct cifs_writedata, work);
2312         struct inode *inode = wdata->cfile->dentry->d_inode;
2313         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2314
2315         spin_lock(&inode->i_lock);
2316         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2317         if (cifsi->server_eof > inode->i_size)
2318                 i_size_write(inode, cifsi->server_eof);
2319         spin_unlock(&inode->i_lock);
2320
2321         complete(&wdata->done);
2322
2323         if (wdata->result != -EAGAIN) {
2324                 for (i = 0; i < wdata->nr_pages; i++)
2325                         put_page(wdata->pages[i]);
2326         }
2327
2328         kref_put(&wdata->refcount, cifs_writedata_release);
2329 }
2330
2331 /* attempt to send write to server, retry on any -EAGAIN errors */
2332 static int
2333 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2334 {
2335         int rc;
2336         struct TCP_Server_Info *server;
2337
2338         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2339
2340         do {
2341                 if (wdata->cfile->invalidHandle) {
2342                         rc = cifs_reopen_file(wdata->cfile, false);
2343                         if (rc != 0)
2344                                 continue;
2345                 }
2346                 rc = server->ops->async_writev(wdata);
2347         } while (rc == -EAGAIN);
2348
2349         return rc;
2350 }
2351
2352 static ssize_t
2353 cifs_iovec_write(struct file *file, const struct iovec *iov,
2354                  unsigned long nr_segs, loff_t *poffset)
2355 {
2356         unsigned long nr_pages, i;
2357         size_t copied, len, cur_len;
2358         ssize_t total_written = 0;
2359         loff_t offset;
2360         struct iov_iter it;
2361         struct cifsFileInfo *open_file;
2362         struct cifs_tcon *tcon;
2363         struct cifs_sb_info *cifs_sb;
2364         struct cifs_writedata *wdata, *tmp;
2365         struct list_head wdata_list;
2366         int rc;
2367         pid_t pid;
2368
2369         len = iov_length(iov, nr_segs);
2370         if (!len)
2371                 return 0;
2372
2373         rc = generic_write_checks(file, poffset, &len, 0);
2374         if (rc)
2375                 return rc;
2376
2377         INIT_LIST_HEAD(&wdata_list);
2378         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2379         open_file = file->private_data;
2380         tcon = tlink_tcon(open_file->tlink);
2381
2382         if (!tcon->ses->server->ops->async_writev)
2383                 return -ENOSYS;
2384
2385         offset = *poffset;
2386
2387         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2388                 pid = open_file->pid;
2389         else
2390                 pid = current->tgid;
2391
2392         iov_iter_init(&it, iov, nr_segs, len, 0);
2393         do {
2394                 size_t save_len;
2395
2396                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2397                 wdata = cifs_writedata_alloc(nr_pages,
2398                                              cifs_uncached_writev_complete);
2399                 if (!wdata) {
2400                         rc = -ENOMEM;
2401                         break;
2402                 }
2403
2404                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2405                 if (rc) {
2406                         kfree(wdata);
2407                         break;
2408                 }
2409
2410                 save_len = cur_len;
2411                 for (i = 0; i < nr_pages; i++) {
2412                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2413                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2414                                                          0, copied);
2415                         cur_len -= copied;
2416                         iov_iter_advance(&it, copied);
2417                 }
2418                 cur_len = save_len - cur_len;
2419
2420                 wdata->sync_mode = WB_SYNC_ALL;
2421                 wdata->nr_pages = nr_pages;
2422                 wdata->offset = (__u64)offset;
2423                 wdata->cfile = cifsFileInfo_get(open_file);
2424                 wdata->pid = pid;
2425                 wdata->bytes = cur_len;
2426                 wdata->pagesz = PAGE_SIZE;
2427                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2428                 rc = cifs_uncached_retry_writev(wdata);
2429                 if (rc) {
2430                         kref_put(&wdata->refcount, cifs_writedata_release);
2431                         break;
2432                 }
2433
2434                 list_add_tail(&wdata->list, &wdata_list);
2435                 offset += cur_len;
2436                 len -= cur_len;
2437         } while (len > 0);
2438
2439         /*
2440          * If at least one write was successfully sent, then discard any rc
2441          * value from the later writes. If the other write succeeds, then
2442          * we'll end up returning whatever was written. If it fails, then
2443          * we'll get a new rc value from that.
2444          */
2445         if (!list_empty(&wdata_list))
2446                 rc = 0;
2447
2448         /*
2449          * Wait for and collect replies for any successful sends in order of
2450          * increasing offset. Once an error is hit or we get a fatal signal
2451          * while waiting, then return without waiting for any more replies.
2452          */
2453 restart_loop:
2454         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2455                 if (!rc) {
2456                         /* FIXME: freezable too? */
2457                         rc = wait_for_completion_killable(&wdata->done);
2458                         if (rc)
2459                                 rc = -EINTR;
2460                         else if (wdata->result)
2461                                 rc = wdata->result;
2462                         else
2463                                 total_written += wdata->bytes;
2464
2465                         /* resend call if it's a retryable error */
2466                         if (rc == -EAGAIN) {
2467                                 rc = cifs_uncached_retry_writev(wdata);
2468                                 goto restart_loop;
2469                         }
2470                 }
2471                 list_del_init(&wdata->list);
2472                 kref_put(&wdata->refcount, cifs_writedata_release);
2473         }
2474
2475         if (total_written > 0)
2476                 *poffset += total_written;
2477
2478         cifs_stats_bytes_written(tcon, total_written);
2479         return total_written ? total_written : (ssize_t)rc;
2480 }
2481
2482 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2483                                 unsigned long nr_segs, loff_t pos)
2484 {
2485         ssize_t written;
2486         struct inode *inode;
2487
2488         inode = file_inode(iocb->ki_filp);
2489
2490         /*
2491          * BB - optimize the way when signing is disabled. We can drop this
2492          * extra memory-to-memory copying and use iovec buffers for constructing
2493          * write request.
2494          */
2495
2496         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2497         if (written > 0) {
2498                 CIFS_I(inode)->invalid_mapping = true;
2499                 iocb->ki_pos = pos;
2500         }
2501
2502         return written;
2503 }
2504
2505 static ssize_t
2506 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2507             unsigned long nr_segs, loff_t pos)
2508 {
2509         struct file *file = iocb->ki_filp;
2510         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2511         struct inode *inode = file->f_mapping->host;
2512         struct cifsInodeInfo *cinode = CIFS_I(inode);
2513         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2514         ssize_t rc = -EACCES;
2515
2516         BUG_ON(iocb->ki_pos != pos);
2517
2518         /*
2519          * We need to hold the sem to be sure nobody modifies lock list
2520          * with a brlock that prevents writing.
2521          */
2522         down_read(&cinode->lock_sem);
2523         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2524                                      server->vals->exclusive_lock_type, NULL,
2525                                      CIFS_WRITE_OP)) {
2526                 mutex_lock(&inode->i_mutex);
2527                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2528                                                &iocb->ki_pos);
2529                 mutex_unlock(&inode->i_mutex);
2530         }
2531
2532         if (rc > 0 || rc == -EIOCBQUEUED) {
2533                 ssize_t err;
2534
2535                 err = generic_write_sync(file, pos, rc);
2536                 if (err < 0 && rc > 0)
2537                         rc = err;
2538         }
2539
2540         up_read(&cinode->lock_sem);
2541         return rc;
2542 }
2543
2544 ssize_t
2545 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2546                    unsigned long nr_segs, loff_t pos)
2547 {
2548         struct inode *inode = file_inode(iocb->ki_filp);
2549         struct cifsInodeInfo *cinode = CIFS_I(inode);
2550         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2551         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2552                                                 iocb->ki_filp->private_data;
2553         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2554         ssize_t written;
2555
2556         if (cinode->clientCanCacheAll) {
2557                 if (cap_unix(tcon->ses) &&
2558                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2559                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2560                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2561                 return cifs_writev(iocb, iov, nr_segs, pos);
2562         }
2563         /*
2564          * For non-oplocked files in strict cache mode we need to write the data
2565          * to the server exactly from the pos to pos+len-1 rather than flush all
2566          * affected pages because it may cause a error with mandatory locks on
2567          * these pages but not on the region from pos to ppos+len-1.
2568          */
2569         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2570         if (written > 0 && cinode->clientCanCacheRead) {
2571                 /*
2572                  * Windows 7 server can delay breaking level2 oplock if a write
2573                  * request comes - break it on the client to prevent reading
2574                  * an old data.
2575                  */
2576                 cifs_invalidate_mapping(inode);
2577                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2578                          inode);
2579                 cinode->clientCanCacheRead = false;
2580         }
2581         return written;
2582 }
2583
2584 static struct cifs_readdata *
2585 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2586 {
2587         struct cifs_readdata *rdata;
2588
2589         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2590                         GFP_KERNEL);
2591         if (rdata != NULL) {
2592                 kref_init(&rdata->refcount);
2593                 INIT_LIST_HEAD(&rdata->list);
2594                 init_completion(&rdata->done);
2595                 INIT_WORK(&rdata->work, complete);
2596         }
2597
2598         return rdata;
2599 }
2600
2601 void
2602 cifs_readdata_release(struct kref *refcount)
2603 {
2604         struct cifs_readdata *rdata = container_of(refcount,
2605                                         struct cifs_readdata, refcount);
2606
2607         if (rdata->cfile)
2608                 cifsFileInfo_put(rdata->cfile);
2609
2610         kfree(rdata);
2611 }
2612
2613 static int
2614 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2615 {
2616         int rc = 0;
2617         struct page *page;
2618         unsigned int i;
2619
2620         for (i = 0; i < nr_pages; i++) {
2621                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2622                 if (!page) {
2623                         rc = -ENOMEM;
2624                         break;
2625                 }
2626                 rdata->pages[i] = page;
2627         }
2628
2629         if (rc) {
2630                 for (i = 0; i < nr_pages; i++) {
2631                         put_page(rdata->pages[i]);
2632                         rdata->pages[i] = NULL;
2633                 }
2634         }
2635         return rc;
2636 }
2637
2638 static void
2639 cifs_uncached_readdata_release(struct kref *refcount)
2640 {
2641         struct cifs_readdata *rdata = container_of(refcount,
2642                                         struct cifs_readdata, refcount);
2643         unsigned int i;
2644
2645         for (i = 0; i < rdata->nr_pages; i++) {
2646                 put_page(rdata->pages[i]);
2647                 rdata->pages[i] = NULL;
2648         }
2649         cifs_readdata_release(refcount);
2650 }
2651
2652 static int
2653 cifs_retry_async_readv(struct cifs_readdata *rdata)
2654 {
2655         int rc;
2656         struct TCP_Server_Info *server;
2657
2658         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2659
2660         do {
2661                 if (rdata->cfile->invalidHandle) {
2662                         rc = cifs_reopen_file(rdata->cfile, true);
2663                         if (rc != 0)
2664                                 continue;
2665                 }
2666                 rc = server->ops->async_readv(rdata);
2667         } while (rc == -EAGAIN);
2668
2669         return rc;
2670 }
2671
2672 /**
2673  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2674  * @rdata:      the readdata response with list of pages holding data
2675  * @iov:        vector in which we should copy the data
2676  * @nr_segs:    number of segments in vector
2677  * @offset:     offset into file of the first iovec
2678  * @copied:     used to return the amount of data copied to the iov
2679  *
2680  * This function copies data from a list of pages in a readdata response into
2681  * an array of iovecs. It will first calculate where the data should go
2682  * based on the info in the readdata and then copy the data into that spot.
2683  */
2684 static ssize_t
2685 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2686                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2687 {
2688         int rc = 0;
2689         struct iov_iter ii;
2690         size_t pos = rdata->offset - offset;
2691         ssize_t remaining = rdata->bytes;
2692         unsigned char *pdata;
2693         unsigned int i;
2694
2695         /* set up iov_iter and advance to the correct offset */
2696         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2697         iov_iter_advance(&ii, pos);
2698
2699         *copied = 0;
2700         for (i = 0; i < rdata->nr_pages; i++) {
2701                 ssize_t copy;
2702                 struct page *page = rdata->pages[i];
2703
2704                 /* copy a whole page or whatever's left */
2705                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2706
2707                 /* ...but limit it to whatever space is left in the iov */
2708                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2709
2710                 /* go while there's data to be copied and no errors */
2711                 if (copy && !rc) {
2712                         pdata = kmap(page);
2713                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2714                                                 (int)copy);
2715                         kunmap(page);
2716                         if (!rc) {
2717                                 *copied += copy;
2718                                 remaining -= copy;
2719                                 iov_iter_advance(&ii, copy);
2720                         }
2721                 }
2722         }
2723
2724         return rc;
2725 }
2726
2727 static void
2728 cifs_uncached_readv_complete(struct work_struct *work)
2729 {
2730         struct cifs_readdata *rdata = container_of(work,
2731                                                 struct cifs_readdata, work);
2732
2733         complete(&rdata->done);
2734         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2735 }
2736
2737 static int
2738 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2739                         struct cifs_readdata *rdata, unsigned int len)
2740 {
2741         int total_read = 0, result = 0;
2742         unsigned int i;
2743         unsigned int nr_pages = rdata->nr_pages;
2744         struct kvec iov;
2745
2746         rdata->tailsz = PAGE_SIZE;
2747         for (i = 0; i < nr_pages; i++) {
2748                 struct page *page = rdata->pages[i];
2749
2750                 if (len >= PAGE_SIZE) {
2751                         /* enough data to fill the page */
2752                         iov.iov_base = kmap(page);
2753                         iov.iov_len = PAGE_SIZE;
2754                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2755                                  i, iov.iov_base, iov.iov_len);
2756                         len -= PAGE_SIZE;
2757                 } else if (len > 0) {
2758                         /* enough for partial page, fill and zero the rest */
2759                         iov.iov_base = kmap(page);
2760                         iov.iov_len = len;
2761                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2762                                  i, iov.iov_base, iov.iov_len);
2763                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2764                         rdata->tailsz = len;
2765                         len = 0;
2766                 } else {
2767                         /* no need to hold page hostage */
2768                         rdata->pages[i] = NULL;
2769                         rdata->nr_pages--;
2770                         put_page(page);
2771                         continue;
2772                 }
2773
2774                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2775                 kunmap(page);
2776                 if (result < 0)
2777                         break;
2778
2779                 total_read += result;
2780         }
2781
2782         return total_read > 0 ? total_read : result;
2783 }
2784
2785 static ssize_t
2786 cifs_iovec_read(struct file *file, const struct iovec *iov,
2787                  unsigned long nr_segs, loff_t *poffset)
2788 {
2789         ssize_t rc;
2790         size_t len, cur_len;
2791         ssize_t total_read = 0;
2792         loff_t offset = *poffset;
2793         unsigned int npages;
2794         struct cifs_sb_info *cifs_sb;
2795         struct cifs_tcon *tcon;
2796         struct cifsFileInfo *open_file;
2797         struct cifs_readdata *rdata, *tmp;
2798         struct list_head rdata_list;
2799         pid_t pid;
2800
2801         if (!nr_segs)
2802                 return 0;
2803
2804         len = iov_length(iov, nr_segs);
2805         if (!len)
2806                 return 0;
2807
2808         INIT_LIST_HEAD(&rdata_list);
2809         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2810         open_file = file->private_data;
2811         tcon = tlink_tcon(open_file->tlink);
2812
2813         if (!tcon->ses->server->ops->async_readv)
2814                 return -ENOSYS;
2815
2816         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2817                 pid = open_file->pid;
2818         else
2819                 pid = current->tgid;
2820
2821         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2822                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2823
2824         do {
2825                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2826                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2827
2828                 /* allocate a readdata struct */
2829                 rdata = cifs_readdata_alloc(npages,
2830                                             cifs_uncached_readv_complete);
2831                 if (!rdata) {
2832                         rc = -ENOMEM;
2833                         goto error;
2834                 }
2835
2836                 rc = cifs_read_allocate_pages(rdata, npages);
2837                 if (rc)
2838                         goto error;
2839
2840                 rdata->cfile = cifsFileInfo_get(open_file);
2841                 rdata->nr_pages = npages;
2842                 rdata->offset = offset;
2843                 rdata->bytes = cur_len;
2844                 rdata->pid = pid;
2845                 rdata->pagesz = PAGE_SIZE;
2846                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2847
2848                 rc = cifs_retry_async_readv(rdata);
2849 error:
2850                 if (rc) {
2851                         kref_put(&rdata->refcount,
2852                                  cifs_uncached_readdata_release);
2853                         break;
2854                 }
2855
2856                 list_add_tail(&rdata->list, &rdata_list);
2857                 offset += cur_len;
2858                 len -= cur_len;
2859         } while (len > 0);
2860
2861         /* if at least one read request send succeeded, then reset rc */
2862         if (!list_empty(&rdata_list))
2863                 rc = 0;
2864
2865         /* the loop below should proceed in the order of increasing offsets */
2866 restart_loop:
2867         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2868                 if (!rc) {
2869                         ssize_t copied;
2870
2871                         /* FIXME: freezable sleep too? */
2872                         rc = wait_for_completion_killable(&rdata->done);
2873                         if (rc)
2874                                 rc = -EINTR;
2875                         else if (rdata->result)
2876                                 rc = rdata->result;
2877                         else {
2878                                 rc = cifs_readdata_to_iov(rdata, iov,
2879                                                         nr_segs, *poffset,
2880                                                         &copied);
2881                                 total_read += copied;
2882                         }
2883
2884                         /* resend call if it's a retryable error */
2885                         if (rc == -EAGAIN) {
2886                                 rc = cifs_retry_async_readv(rdata);
2887                                 goto restart_loop;
2888                         }
2889                 }
2890                 list_del_init(&rdata->list);
2891                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2892         }
2893
2894         cifs_stats_bytes_read(tcon, total_read);
2895         *poffset += total_read;
2896
2897         /* mask nodata case */
2898         if (rc == -ENODATA)
2899                 rc = 0;
2900
2901         return total_read ? total_read : rc;
2902 }
2903
2904 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2905                                unsigned long nr_segs, loff_t pos)
2906 {
2907         ssize_t read;
2908
2909         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2910         if (read > 0)
2911                 iocb->ki_pos = pos;
2912
2913         return read;
2914 }
2915
2916 ssize_t
2917 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2918                   unsigned long nr_segs, loff_t pos)
2919 {
2920         struct inode *inode = file_inode(iocb->ki_filp);
2921         struct cifsInodeInfo *cinode = CIFS_I(inode);
2922         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2923         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2924                                                 iocb->ki_filp->private_data;
2925         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2926         int rc = -EACCES;
2927
2928         /*
2929          * In strict cache mode we need to read from the server all the time
2930          * if we don't have level II oplock because the server can delay mtime
2931          * change - so we can't make a decision about inode invalidating.
2932          * And we can also fail with pagereading if there are mandatory locks
2933          * on pages affected by this read but not on the region from pos to
2934          * pos+len-1.
2935          */
2936         if (!cinode->clientCanCacheRead)
2937                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2938
2939         if (cap_unix(tcon->ses) &&
2940             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2941             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2942                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2943
2944         /*
2945          * We need to hold the sem to be sure nobody modifies lock list
2946          * with a brlock that prevents reading.
2947          */
2948         down_read(&cinode->lock_sem);
2949         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2950                                      tcon->ses->server->vals->shared_lock_type,
2951                                      NULL, CIFS_READ_OP))
2952                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2953         up_read(&cinode->lock_sem);
2954         return rc;
2955 }
2956
2957 static ssize_t
2958 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2959 {
2960         int rc = -EACCES;
2961         unsigned int bytes_read = 0;
2962         unsigned int total_read;
2963         unsigned int current_read_size;
2964         unsigned int rsize;
2965         struct cifs_sb_info *cifs_sb;
2966         struct cifs_tcon *tcon;
2967         struct TCP_Server_Info *server;
2968         unsigned int xid;
2969         char *cur_offset;
2970         struct cifsFileInfo *open_file;
2971         struct cifs_io_parms io_parms;
2972         int buf_type = CIFS_NO_BUFFER;
2973         __u32 pid;
2974
2975         xid = get_xid();
2976         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2977
2978         /* FIXME: set up handlers for larger reads and/or convert to async */
2979         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2980
2981         if (file->private_data == NULL) {
2982                 rc = -EBADF;
2983                 free_xid(xid);
2984                 return rc;
2985         }
2986         open_file = file->private_data;
2987         tcon = tlink_tcon(open_file->tlink);
2988         server = tcon->ses->server;
2989
2990         if (!server->ops->sync_read) {
2991                 free_xid(xid);
2992                 return -ENOSYS;
2993         }
2994
2995         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2996                 pid = open_file->pid;
2997         else
2998                 pid = current->tgid;
2999
3000         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3001                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3002
3003         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3004              total_read += bytes_read, cur_offset += bytes_read) {
3005                 current_read_size = min_t(uint, read_size - total_read, rsize);
3006                 /*
3007                  * For windows me and 9x we do not want to request more than it
3008                  * negotiated since it will refuse the read then.
3009                  */
3010                 if ((tcon->ses) && !(tcon->ses->capabilities &
3011                                 tcon->ses->server->vals->cap_large_files)) {
3012                         current_read_size = min_t(uint, current_read_size,
3013                                         CIFSMaxBufSize);
3014                 }
3015                 rc = -EAGAIN;
3016                 while (rc == -EAGAIN) {
3017                         if (open_file->invalidHandle) {
3018                                 rc = cifs_reopen_file(open_file, true);
3019                                 if (rc != 0)
3020                                         break;
3021                         }
3022                         io_parms.pid = pid;
3023                         io_parms.tcon = tcon;
3024                         io_parms.offset = *offset;
3025                         io_parms.length = current_read_size;
3026                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3027                                                     &bytes_read, &cur_offset,
3028                                                     &buf_type);
3029                 }
3030                 if (rc || (bytes_read == 0)) {
3031                         if (total_read) {
3032                                 break;
3033                         } else {
3034                                 free_xid(xid);
3035                                 return rc;
3036                         }
3037                 } else {
3038                         cifs_stats_bytes_read(tcon, total_read);
3039                         *offset += bytes_read;
3040                 }
3041         }
3042         free_xid(xid);
3043         return total_read;
3044 }
3045
3046 /*
3047  * If the page is mmap'ed into a process' page tables, then we need to make
3048  * sure that it doesn't change while being written back.
3049  */
3050 static int
3051 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3052 {
3053         struct page *page = vmf->page;
3054
3055         lock_page(page);
3056         return VM_FAULT_LOCKED;
3057 }
3058
3059 static struct vm_operations_struct cifs_file_vm_ops = {
3060         .fault = filemap_fault,
3061         .page_mkwrite = cifs_page_mkwrite,
3062         .remap_pages = generic_file_remap_pages,
3063 };
3064
3065 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3066 {
3067         int rc, xid;
3068         struct inode *inode = file_inode(file);
3069
3070         xid = get_xid();
3071
3072         if (!CIFS_I(inode)->clientCanCacheRead) {
3073                 rc = cifs_invalidate_mapping(inode);
3074                 if (rc)
3075                         return rc;
3076         }
3077
3078         rc = generic_file_mmap(file, vma);
3079         if (rc == 0)
3080                 vma->vm_ops = &cifs_file_vm_ops;
3081         free_xid(xid);
3082         return rc;
3083 }
3084
3085 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3086 {
3087         int rc, xid;
3088
3089         xid = get_xid();
3090         rc = cifs_revalidate_file(file);
3091         if (rc) {
3092                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3093                          rc);
3094                 free_xid(xid);
3095                 return rc;
3096         }
3097         rc = generic_file_mmap(file, vma);
3098         if (rc == 0)
3099                 vma->vm_ops = &cifs_file_vm_ops;
3100         free_xid(xid);
3101         return rc;
3102 }
3103
3104 static void
3105 cifs_readv_complete(struct work_struct *work)
3106 {
3107         unsigned int i;
3108         struct cifs_readdata *rdata = container_of(work,
3109                                                 struct cifs_readdata, work);
3110
3111         for (i = 0; i < rdata->nr_pages; i++) {
3112                 struct page *page = rdata->pages[i];
3113
3114                 lru_cache_add_file(page);
3115
3116                 if (rdata->result == 0) {
3117                         flush_dcache_page(page);
3118                         SetPageUptodate(page);
3119                 }
3120
3121                 unlock_page(page);
3122
3123                 if (rdata->result == 0)
3124                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3125
3126                 page_cache_release(page);
3127                 rdata->pages[i] = NULL;
3128         }
3129         kref_put(&rdata->refcount, cifs_readdata_release);
3130 }
3131
3132 static int
3133 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3134                         struct cifs_readdata *rdata, unsigned int len)
3135 {
3136         int total_read = 0, result = 0;
3137         unsigned int i;
3138         u64 eof;
3139         pgoff_t eof_index;
3140         unsigned int nr_pages = rdata->nr_pages;
3141         struct kvec iov;
3142
3143         /* determine the eof that the server (probably) has */
3144         eof = CIFS_I(rdata->mapping->host)->server_eof;
3145         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3146         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3147
3148         rdata->tailsz = PAGE_CACHE_SIZE;
3149         for (i = 0; i < nr_pages; i++) {
3150                 struct page *page = rdata->pages[i];
3151
3152                 if (len >= PAGE_CACHE_SIZE) {
3153                         /* enough data to fill the page */
3154                         iov.iov_base = kmap(page);
3155                         iov.iov_len = PAGE_CACHE_SIZE;
3156                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3157                                  i, page->index, iov.iov_base, iov.iov_len);
3158                         len -= PAGE_CACHE_SIZE;
3159                 } else if (len > 0) {
3160                         /* enough for partial page, fill and zero the rest */
3161                         iov.iov_base = kmap(page);
3162                         iov.iov_len = len;
3163                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3164                                  i, page->index, iov.iov_base, iov.iov_len);
3165                         memset(iov.iov_base + len,
3166                                 '\0', PAGE_CACHE_SIZE - len);
3167                         rdata->tailsz = len;
3168                         len = 0;
3169                 } else if (page->index > eof_index) {
3170                         /*
3171                          * The VFS will not try to do readahead past the
3172                          * i_size, but it's possible that we have outstanding
3173                          * writes with gaps in the middle and the i_size hasn't
3174                          * caught up yet. Populate those with zeroed out pages
3175                          * to prevent the VFS from repeatedly attempting to
3176                          * fill them until the writes are flushed.
3177                          */
3178                         zero_user(page, 0, PAGE_CACHE_SIZE);
3179                         lru_cache_add_file(page);
3180                         flush_dcache_page(page);
3181                         SetPageUptodate(page);
3182                         unlock_page(page);
3183                         page_cache_release(page);
3184                         rdata->pages[i] = NULL;
3185                         rdata->nr_pages--;
3186                         continue;
3187                 } else {
3188                         /* no need to hold page hostage */
3189                         lru_cache_add_file(page);
3190                         unlock_page(page);
3191                         page_cache_release(page);
3192                         rdata->pages[i] = NULL;
3193                         rdata->nr_pages--;
3194                         continue;
3195                 }
3196
3197                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3198                 kunmap(page);
3199                 if (result < 0)
3200                         break;
3201
3202                 total_read += result;
3203         }
3204
3205         return total_read > 0 ? total_read : result;
3206 }
3207
3208 static int cifs_readpages(struct file *file, struct address_space *mapping,
3209         struct list_head *page_list, unsigned num_pages)
3210 {
3211         int rc;
3212         struct list_head tmplist;
3213         struct cifsFileInfo *open_file = file->private_data;
3214         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3215         unsigned int rsize = cifs_sb->rsize;
3216         pid_t pid;
3217
3218         /*
3219          * Give up immediately if rsize is too small to read an entire page.
3220          * The VFS will fall back to readpage. We should never reach this
3221          * point however since we set ra_pages to 0 when the rsize is smaller
3222          * than a cache page.
3223          */
3224         if (unlikely(rsize < PAGE_CACHE_SIZE))
3225                 return 0;
3226
3227         /*
3228          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3229          * immediately if the cookie is negative
3230          */
3231         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3232                                          &num_pages);
3233         if (rc == 0)
3234                 return rc;
3235
3236         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3237                 pid = open_file->pid;
3238         else
3239                 pid = current->tgid;
3240
3241         rc = 0;
3242         INIT_LIST_HEAD(&tmplist);
3243
3244         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3245                  __func__, file, mapping, num_pages);
3246
3247         /*
3248          * Start with the page at end of list and move it to private
3249          * list. Do the same with any following pages until we hit
3250          * the rsize limit, hit an index discontinuity, or run out of
3251          * pages. Issue the async read and then start the loop again
3252          * until the list is empty.
3253          *
3254          * Note that list order is important. The page_list is in
3255          * the order of declining indexes. When we put the pages in
3256          * the rdata->pages, then we want them in increasing order.
3257          */
3258         while (!list_empty(page_list)) {
3259                 unsigned int i;
3260                 unsigned int bytes = PAGE_CACHE_SIZE;
3261                 unsigned int expected_index;
3262                 unsigned int nr_pages = 1;
3263                 loff_t offset;
3264                 struct page *page, *tpage;
3265                 struct cifs_readdata *rdata;
3266
3267                 page = list_entry(page_list->prev, struct page, lru);
3268
3269                 /*
3270                  * Lock the page and put it in the cache. Since no one else
3271                  * should have access to this page, we're safe to simply set
3272                  * PG_locked without checking it first.
3273                  */
3274                 __set_page_locked(page);
3275                 rc = add_to_page_cache_locked(page, mapping,
3276                                               page->index, GFP_KERNEL);
3277
3278                 /* give up if we can't stick it in the cache */
3279                 if (rc) {
3280                         __clear_page_locked(page);
3281                         break;
3282                 }
3283
3284                 /* move first page to the tmplist */
3285                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3286                 list_move_tail(&page->lru, &tmplist);
3287
3288                 /* now try and add more pages onto the request */
3289                 expected_index = page->index + 1;
3290                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3291                         /* discontinuity ? */
3292                         if (page->index != expected_index)
3293                                 break;
3294
3295                         /* would this page push the read over the rsize? */
3296                         if (bytes + PAGE_CACHE_SIZE > rsize)
3297                                 break;
3298
3299                         __set_page_locked(page);
3300                         if (add_to_page_cache_locked(page, mapping,
3301                                                 page->index, GFP_KERNEL)) {
3302                                 __clear_page_locked(page);
3303                                 break;
3304                         }
3305                         list_move_tail(&page->lru, &tmplist);
3306                         bytes += PAGE_CACHE_SIZE;
3307                         expected_index++;
3308                         nr_pages++;
3309                 }
3310
3311                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3312                 if (!rdata) {
3313                         /* best to give up if we're out of mem */
3314                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3315                                 list_del(&page->lru);
3316                                 lru_cache_add_file(page);
3317                                 unlock_page(page);
3318                                 page_cache_release(page);
3319                         }
3320                         rc = -ENOMEM;
3321                         break;
3322                 }
3323
3324                 rdata->cfile = cifsFileInfo_get(open_file);
3325                 rdata->mapping = mapping;
3326                 rdata->offset = offset;
3327                 rdata->bytes = bytes;
3328                 rdata->pid = pid;
3329                 rdata->pagesz = PAGE_CACHE_SIZE;
3330                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3331
3332                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3333                         list_del(&page->lru);
3334                         rdata->pages[rdata->nr_pages++] = page;
3335                 }
3336
3337                 rc = cifs_retry_async_readv(rdata);
3338                 if (rc != 0) {
3339                         for (i = 0; i < rdata->nr_pages; i++) {
3340                                 page = rdata->pages[i];
3341                                 lru_cache_add_file(page);
3342                                 unlock_page(page);
3343                                 page_cache_release(page);
3344                         }
3345                         kref_put(&rdata->refcount, cifs_readdata_release);
3346                         break;
3347                 }
3348
3349                 kref_put(&rdata->refcount, cifs_readdata_release);
3350         }
3351
3352         return rc;
3353 }
3354
3355 static int cifs_readpage_worker(struct file *file, struct page *page,
3356         loff_t *poffset)
3357 {
3358         char *read_data;
3359         int rc;
3360
3361         /* Is the page cached? */
3362         rc = cifs_readpage_from_fscache(file_inode(file), page);
3363         if (rc == 0)
3364                 goto read_complete;
3365
3366         page_cache_get(page);
3367         read_data = kmap(page);
3368         /* for reads over a certain size could initiate async read ahead */
3369
3370         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3371
3372         if (rc < 0)
3373                 goto io_error;
3374         else
3375                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3376
3377         file_inode(file)->i_atime =
3378                 current_fs_time(file_inode(file)->i_sb);
3379
3380         if (PAGE_CACHE_SIZE > rc)
3381                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3382
3383         flush_dcache_page(page);
3384         SetPageUptodate(page);
3385
3386         /* send this page to the cache */
3387         cifs_readpage_to_fscache(file_inode(file), page);
3388
3389         rc = 0;
3390
3391 io_error:
3392         kunmap(page);
3393         page_cache_release(page);
3394
3395 read_complete:
3396         return rc;
3397 }
3398
3399 static int cifs_readpage(struct file *file, struct page *page)
3400 {
3401         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3402         int rc = -EACCES;
3403         unsigned int xid;
3404
3405         xid = get_xid();
3406
3407         if (file->private_data == NULL) {
3408                 rc = -EBADF;
3409                 free_xid(xid);
3410                 return rc;
3411         }
3412
3413         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3414                  page, (int)offset, (int)offset);
3415
3416         rc = cifs_readpage_worker(file, page, &offset);
3417
3418         unlock_page(page);
3419
3420         free_xid(xid);
3421         return rc;
3422 }
3423
3424 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3425 {
3426         struct cifsFileInfo *open_file;
3427
3428         spin_lock(&cifs_file_list_lock);
3429         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3430                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3431                         spin_unlock(&cifs_file_list_lock);
3432                         return 1;
3433                 }
3434         }
3435         spin_unlock(&cifs_file_list_lock);
3436         return 0;
3437 }
3438
3439 /* We do not want to update the file size from server for inodes
3440    open for write - to avoid races with writepage extending
3441    the file - in the future we could consider allowing
3442    refreshing the inode only on increases in the file size
3443    but this is tricky to do without racing with writebehind
3444    page caching in the current Linux kernel design */
3445 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3446 {
3447         if (!cifsInode)
3448                 return true;
3449
3450         if (is_inode_writable(cifsInode)) {
3451                 /* This inode is open for write at least once */
3452                 struct cifs_sb_info *cifs_sb;
3453
3454                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3455                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3456                         /* since no page cache to corrupt on directio
3457                         we can change size safely */
3458                         return true;
3459                 }
3460
3461                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3462                         return true;
3463
3464                 return false;
3465         } else
3466                 return true;
3467 }
3468
3469 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3470                         loff_t pos, unsigned len, unsigned flags,
3471                         struct page **pagep, void **fsdata)
3472 {
3473         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3474         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3475         loff_t page_start = pos & PAGE_MASK;
3476         loff_t i_size;
3477         struct page *page;
3478         int rc = 0;
3479
3480         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3481
3482         page = grab_cache_page_write_begin(mapping, index, flags);
3483         if (!page) {
3484                 rc = -ENOMEM;
3485                 goto out;
3486         }
3487
3488         if (PageUptodate(page))
3489                 goto out;
3490
3491         /*
3492          * If we write a full page it will be up to date, no need to read from
3493          * the server. If the write is short, we'll end up doing a sync write
3494          * instead.
3495          */
3496         if (len == PAGE_CACHE_SIZE)
3497                 goto out;
3498
3499         /*
3500          * optimize away the read when we have an oplock, and we're not
3501          * expecting to use any of the data we'd be reading in. That
3502          * is, when the page lies beyond the EOF, or straddles the EOF
3503          * and the write will cover all of the existing data.
3504          */
3505         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3506                 i_size = i_size_read(mapping->host);
3507                 if (page_start >= i_size ||
3508                     (offset == 0 && (pos + len) >= i_size)) {
3509                         zero_user_segments(page, 0, offset,
3510                                            offset + len,
3511                                            PAGE_CACHE_SIZE);
3512                         /*
3513                          * PageChecked means that the parts of the page
3514                          * to which we're not writing are considered up
3515                          * to date. Once the data is copied to the
3516                          * page, it can be set uptodate.
3517                          */
3518                         SetPageChecked(page);
3519                         goto out;
3520                 }
3521         }
3522
3523         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3524                 /*
3525                  * might as well read a page, it is fast enough. If we get
3526                  * an error, we don't need to return it. cifs_write_end will
3527                  * do a sync write instead since PG_uptodate isn't set.
3528                  */
3529                 cifs_readpage_worker(file, page, &page_start);
3530         } else {
3531                 /* we could try using another file handle if there is one -
3532                    but how would we lock it to prevent close of that handle
3533                    racing with this read? In any case
3534                    this will be written out by write_end so is fine */
3535         }
3536 out:
3537         *pagep = page;
3538         return rc;
3539 }
3540
3541 static int cifs_release_page(struct page *page, gfp_t gfp)
3542 {
3543         if (PagePrivate(page))
3544                 return 0;
3545
3546         return cifs_fscache_release_page(page, gfp);
3547 }
3548
3549 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3550 {
3551         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3552
3553         if (offset == 0)
3554                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3555 }
3556
3557 static int cifs_launder_page(struct page *page)
3558 {
3559         int rc = 0;
3560         loff_t range_start = page_offset(page);
3561         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3562         struct writeback_control wbc = {
3563                 .sync_mode = WB_SYNC_ALL,
3564                 .nr_to_write = 0,
3565                 .range_start = range_start,
3566                 .range_end = range_end,
3567         };
3568
3569         cifs_dbg(FYI, "Launder page: %p\n", page);
3570
3571         if (clear_page_dirty_for_io(page))
3572                 rc = cifs_writepage_locked(page, &wbc);
3573
3574         cifs_fscache_invalidate_page(page, page->mapping->host);
3575         return rc;
3576 }
3577
3578 void cifs_oplock_break(struct work_struct *work)
3579 {
3580         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3581                                                   oplock_break);
3582         struct inode *inode = cfile->dentry->d_inode;
3583         struct cifsInodeInfo *cinode = CIFS_I(inode);
3584         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3585         int rc = 0;
3586
3587         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3588                                                 cifs_has_mand_locks(cinode)) {
3589                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3590                          inode);
3591                 cinode->clientCanCacheRead = false;
3592         }
3593
3594         if (inode && S_ISREG(inode->i_mode)) {
3595                 if (cinode->clientCanCacheRead)
3596                         break_lease(inode, O_RDONLY);
3597                 else
3598                         break_lease(inode, O_WRONLY);
3599                 rc = filemap_fdatawrite(inode->i_mapping);
3600                 if (cinode->clientCanCacheRead == 0) {
3601                         rc = filemap_fdatawait(inode->i_mapping);
3602                         mapping_set_error(inode->i_mapping, rc);
3603                         cifs_invalidate_mapping(inode);
3604                 }
3605                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3606         }
3607
3608         rc = cifs_push_locks(cfile);
3609         if (rc)
3610                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3611
3612         /*
3613          * releasing stale oplock after recent reconnect of smb session using
3614          * a now incorrect file handle is not a data integrity issue but do
3615          * not bother sending an oplock release if session to server still is
3616          * disconnected since oplock already released by the server
3617          */
3618         if (!cfile->oplock_break_cancelled) {
3619                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3620                                                              cinode);
3621                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3622         }
3623 }
3624
3625 const struct address_space_operations cifs_addr_ops = {
3626         .readpage = cifs_readpage,
3627         .readpages = cifs_readpages,
3628         .writepage = cifs_writepage,
3629         .writepages = cifs_writepages,
3630         .write_begin = cifs_write_begin,
3631         .write_end = cifs_write_end,
3632         .set_page_dirty = __set_page_dirty_nobuffers,
3633         .releasepage = cifs_release_page,
3634         .invalidatepage = cifs_invalidate_page,
3635         .launder_page = cifs_launder_page,
3636 };
3637
3638 /*
3639  * cifs_readpages requires the server to support a buffer large enough to
3640  * contain the header plus one complete page of data.  Otherwise, we need
3641  * to leave cifs_readpages out of the address space operations.
3642  */
3643 const struct address_space_operations cifs_addr_ops_smallbuf = {
3644         .readpage = cifs_readpage,
3645         .writepage = cifs_writepage,
3646         .writepages = cifs_writepages,
3647         .write_begin = cifs_write_begin,
3648         .write_end = cifs_write_end,
3649         .set_page_dirty = __set_page_dirty_nobuffers,
3650         .releasepage = cifs_release_page,
3651         .invalidatepage = cifs_invalidate_page,
3652         .launder_page = cifs_launder_page,
3653 };