Merge signal handler branch
[linux-drm-fsl-dcu.git] / fs / xfs / quota / xfs_dquot.c
1 /*
2  * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_cap.h"
47 #include "xfs_mac.h"
48 #include "xfs_attr.h"
49 #include "xfs_buf_item.h"
50 #include "xfs_trans_space.h"
51 #include "xfs_trans_priv.h"
52 #include "xfs_qm.h"
53
54
55 /*
56    LOCK ORDER
57
58    inode lock               (ilock)
59    dquot hash-chain lock    (hashlock)
60    xqm dquot freelist lock  (freelistlock
61    mount's dquot list lock  (mplistlock)
62    user dquot lock - lock ordering among dquots is based on the uid or gid
63    group dquot lock - similar to udquots. Between the two dquots, the udquot
64                       has to be locked first.
65    pin lock - the dquot lock must be held to take this lock.
66    flush lock - ditto.
67 */
68
69 STATIC void             xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
70
71 #ifdef DEBUG
72 xfs_buftarg_t *xfs_dqerror_target;
73 int xfs_do_dqerror;
74 int xfs_dqreq_num;
75 int xfs_dqerror_mod = 33;
76 #endif
77
78 /*
79  * Allocate and initialize a dquot. We don't always allocate fresh memory;
80  * we try to reclaim a free dquot if the number of incore dquots are above
81  * a threshold.
82  * The only field inside the core that gets initialized at this point
83  * is the d_id field. The idea is to fill in the entire q_core
84  * when we read in the on disk dquot.
85  */
86 STATIC xfs_dquot_t *
87 xfs_qm_dqinit(
88         xfs_mount_t  *mp,
89         xfs_dqid_t   id,
90         uint         type)
91 {
92         xfs_dquot_t     *dqp;
93         boolean_t       brandnewdquot;
94
95         brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
96         dqp->dq_flags = type;
97         dqp->q_core.d_id = cpu_to_be32(id);
98         dqp->q_mount = mp;
99
100         /*
101          * No need to re-initialize these if this is a reclaimed dquot.
102          */
103         if (brandnewdquot) {
104                 dqp->dq_flnext = dqp->dq_flprev = dqp;
105                 mutex_init(&dqp->q_qlock);
106                 initnsema(&dqp->q_flock, 1, "fdq");
107                 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
108
109 #ifdef XFS_DQUOT_TRACE
110                 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
111                 xfs_dqtrace_entry(dqp, "DQINIT");
112 #endif
113         } else {
114                 /*
115                  * Only the q_core portion was zeroed in dqreclaim_one().
116                  * So, we need to reset others.
117                  */
118                  dqp->q_nrefs = 0;
119                  dqp->q_blkno = 0;
120                  dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
121                  dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
122                  dqp->q_bufoffset = 0;
123                  dqp->q_fileoffset = 0;
124                  dqp->q_transp = NULL;
125                  dqp->q_gdquot = NULL;
126                  dqp->q_res_bcount = 0;
127                  dqp->q_res_icount = 0;
128                  dqp->q_res_rtbcount = 0;
129                  dqp->q_pincount = 0;
130                  dqp->q_hash = NULL;
131                  ASSERT(dqp->dq_flnext == dqp->dq_flprev);
132
133 #ifdef XFS_DQUOT_TRACE
134                  ASSERT(dqp->q_trace);
135                  xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
136 #endif
137          }
138
139         /*
140          * log item gets initialized later
141          */
142         return (dqp);
143 }
144
145 /*
146  * This is called to free all the memory associated with a dquot
147  */
148 void
149 xfs_qm_dqdestroy(
150         xfs_dquot_t     *dqp)
151 {
152         ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
153
154         mutex_destroy(&dqp->q_qlock);
155         freesema(&dqp->q_flock);
156         sv_destroy(&dqp->q_pinwait);
157
158 #ifdef XFS_DQUOT_TRACE
159         if (dqp->q_trace)
160              ktrace_free(dqp->q_trace);
161         dqp->q_trace = NULL;
162 #endif
163         kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
164         atomic_dec(&xfs_Gqm->qm_totaldquots);
165 }
166
167 /*
168  * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
169  */
170 STATIC void
171 xfs_qm_dqinit_core(
172         xfs_dqid_t      id,
173         uint            type,
174         xfs_dqblk_t     *d)
175 {
176         /*
177          * Caller has zero'd the entire dquot 'chunk' already.
178          */
179         d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
180         d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
181         d->dd_diskdq.d_id = cpu_to_be32(id);
182         d->dd_diskdq.d_flags = type;
183 }
184
185
186 #ifdef XFS_DQUOT_TRACE
187 /*
188  * Dquot tracing for debugging.
189  */
190 /* ARGSUSED */
191 void
192 __xfs_dqtrace_entry(
193         xfs_dquot_t     *dqp,
194         char            *func,
195         void            *retaddr,
196         xfs_inode_t     *ip)
197 {
198         xfs_dquot_t     *udqp = NULL;
199         xfs_ino_t       ino = 0;
200
201         ASSERT(dqp->q_trace);
202         if (ip) {
203                 ino = ip->i_ino;
204                 udqp = ip->i_udquot;
205         }
206         ktrace_enter(dqp->q_trace,
207                      (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
208                      (void *)func,
209                      (void *)(__psint_t)dqp->q_nrefs,
210                      (void *)(__psint_t)dqp->dq_flags,
211                      (void *)(__psint_t)dqp->q_res_bcount,
212                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
213                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
214                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
215                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
216                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
217                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
218                      (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
219                      (void *)(__psint_t)current_pid(),
220                      (void *)(__psint_t)ino,
221                      (void *)(__psint_t)retaddr,
222                      (void *)(__psint_t)udqp);
223         return;
224 }
225 #endif
226
227
228 /*
229  * If default limits are in force, push them into the dquot now.
230  * We overwrite the dquot limits only if they are zero and this
231  * is not the root dquot.
232  */
233 void
234 xfs_qm_adjust_dqlimits(
235         xfs_mount_t             *mp,
236         xfs_disk_dquot_t        *d)
237 {
238         xfs_quotainfo_t         *q = mp->m_quotainfo;
239
240         ASSERT(d->d_id);
241
242         if (q->qi_bsoftlimit && !d->d_blk_softlimit)
243                 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
244         if (q->qi_bhardlimit && !d->d_blk_hardlimit)
245                 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
246         if (q->qi_isoftlimit && !d->d_ino_softlimit)
247                 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
248         if (q->qi_ihardlimit && !d->d_ino_hardlimit)
249                 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
250         if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
251                 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
252         if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
253                 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
254 }
255
256 /*
257  * Check the limits and timers of a dquot and start or reset timers
258  * if necessary.
259  * This gets called even when quota enforcement is OFF, which makes our
260  * life a little less complicated. (We just don't reject any quota
261  * reservations in that case, when enforcement is off).
262  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
263  * enforcement's off.
264  * In contrast, warnings are a little different in that they don't
265  * 'automatically' get started when limits get exceeded.  They do
266  * get reset to zero, however, when we find the count to be under
267  * the soft limit (they are only ever set non-zero via userspace).
268  */
269 void
270 xfs_qm_adjust_dqtimers(
271         xfs_mount_t             *mp,
272         xfs_disk_dquot_t        *d)
273 {
274         ASSERT(d->d_id);
275
276 #ifdef QUOTADEBUG
277         if (d->d_blk_hardlimit)
278                 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
279                        be64_to_cpu(d->d_blk_hardlimit));
280         if (d->d_ino_hardlimit)
281                 ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
282                        be64_to_cpu(d->d_ino_hardlimit));
283         if (d->d_rtb_hardlimit)
284                 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
285                        be64_to_cpu(d->d_rtb_hardlimit));
286 #endif
287         if (!d->d_btimer) {
288                 if ((d->d_blk_softlimit &&
289                      (be64_to_cpu(d->d_bcount) >=
290                       be64_to_cpu(d->d_blk_softlimit))) ||
291                     (d->d_blk_hardlimit &&
292                      (be64_to_cpu(d->d_bcount) >=
293                       be64_to_cpu(d->d_blk_hardlimit)))) {
294                         d->d_btimer = cpu_to_be32(get_seconds() +
295                                         XFS_QI_BTIMELIMIT(mp));
296                 } else {
297                         d->d_bwarns = 0;
298                 }
299         } else {
300                 if ((!d->d_blk_softlimit ||
301                      (be64_to_cpu(d->d_bcount) <
302                       be64_to_cpu(d->d_blk_softlimit))) &&
303                     (!d->d_blk_hardlimit ||
304                     (be64_to_cpu(d->d_bcount) <
305                      be64_to_cpu(d->d_blk_hardlimit)))) {
306                         d->d_btimer = 0;
307                 }
308         }
309
310         if (!d->d_itimer) {
311                 if ((d->d_ino_softlimit &&
312                      (be64_to_cpu(d->d_icount) >=
313                       be64_to_cpu(d->d_ino_softlimit))) ||
314                     (d->d_ino_hardlimit &&
315                      (be64_to_cpu(d->d_icount) >=
316                       be64_to_cpu(d->d_ino_hardlimit)))) {
317                         d->d_itimer = cpu_to_be32(get_seconds() +
318                                         XFS_QI_ITIMELIMIT(mp));
319                 } else {
320                         d->d_iwarns = 0;
321                 }
322         } else {
323                 if ((!d->d_ino_softlimit ||
324                      (be64_to_cpu(d->d_icount) <
325                       be64_to_cpu(d->d_ino_softlimit)))  &&
326                     (!d->d_ino_hardlimit ||
327                      (be64_to_cpu(d->d_icount) <
328                       be64_to_cpu(d->d_ino_hardlimit)))) {
329                         d->d_itimer = 0;
330                 }
331         }
332
333         if (!d->d_rtbtimer) {
334                 if ((d->d_rtb_softlimit &&
335                      (be64_to_cpu(d->d_rtbcount) >=
336                       be64_to_cpu(d->d_rtb_softlimit))) ||
337                     (d->d_rtb_hardlimit &&
338                      (be64_to_cpu(d->d_rtbcount) >=
339                       be64_to_cpu(d->d_rtb_hardlimit)))) {
340                         d->d_rtbtimer = cpu_to_be32(get_seconds() +
341                                         XFS_QI_RTBTIMELIMIT(mp));
342                 } else {
343                         d->d_rtbwarns = 0;
344                 }
345         } else {
346                 if ((!d->d_rtb_softlimit ||
347                      (be64_to_cpu(d->d_rtbcount) <
348                       be64_to_cpu(d->d_rtb_softlimit))) &&
349                     (!d->d_rtb_hardlimit ||
350                      (be64_to_cpu(d->d_rtbcount) <
351                       be64_to_cpu(d->d_rtb_hardlimit)))) {
352                         d->d_rtbtimer = 0;
353                 }
354         }
355 }
356
357 /*
358  * initialize a buffer full of dquots and log the whole thing
359  */
360 STATIC void
361 xfs_qm_init_dquot_blk(
362         xfs_trans_t     *tp,
363         xfs_mount_t     *mp,
364         xfs_dqid_t      id,
365         uint            type,
366         xfs_buf_t       *bp)
367 {
368         xfs_dqblk_t     *d;
369         int             curid, i;
370
371         ASSERT(tp);
372         ASSERT(XFS_BUF_ISBUSY(bp));
373         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
374
375         d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
376
377         /*
378          * ID of the first dquot in the block - id's are zero based.
379          */
380         curid = id - (id % XFS_QM_DQPERBLK(mp));
381         ASSERT(curid >= 0);
382         memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
383         for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
384                 xfs_qm_dqinit_core(curid, type, d);
385         xfs_trans_dquot_buf(tp, bp,
386                             (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
387                             ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
388                              XFS_BLI_GDQUOT_BUF)));
389         xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
390 }
391
392
393
394 /*
395  * Allocate a block and fill it with dquots.
396  * This is called when the bmapi finds a hole.
397  */
398 STATIC int
399 xfs_qm_dqalloc(
400         xfs_trans_t     **tpp,
401         xfs_mount_t     *mp,
402         xfs_dquot_t     *dqp,
403         xfs_inode_t     *quotip,
404         xfs_fileoff_t   offset_fsb,
405         xfs_buf_t       **O_bpp)
406 {
407         xfs_fsblock_t   firstblock;
408         xfs_bmap_free_t flist;
409         xfs_bmbt_irec_t map;
410         int             nmaps, error, committed;
411         xfs_buf_t       *bp;
412         xfs_trans_t     *tp = *tpp;
413
414         ASSERT(tp != NULL);
415         xfs_dqtrace_entry(dqp, "DQALLOC");
416
417         /*
418          * Initialize the bmap freelist prior to calling bmapi code.
419          */
420         XFS_BMAP_INIT(&flist, &firstblock);
421         xfs_ilock(quotip, XFS_ILOCK_EXCL);
422         /*
423          * Return if this type of quotas is turned off while we didn't
424          * have an inode lock
425          */
426         if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
427                 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
428                 return (ESRCH);
429         }
430
431         /*
432          * xfs_trans_commit normally decrements the vnode ref count
433          * when it unlocks the inode. Since we want to keep the quota
434          * inode around, we bump the vnode ref count now.
435          */
436         VN_HOLD(XFS_ITOV(quotip));
437
438         xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
439         nmaps = 1;
440         if ((error = xfs_bmapi(tp, quotip,
441                               offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
442                               XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
443                               &firstblock,
444                               XFS_QM_DQALLOC_SPACE_RES(mp),
445                               &map, &nmaps, &flist, NULL))) {
446                 goto error0;
447         }
448         ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
449         ASSERT(nmaps == 1);
450         ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
451                (map.br_startblock != HOLESTARTBLOCK));
452
453         /*
454          * Keep track of the blkno to save a lookup later
455          */
456         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
457
458         /* now we can just get the buffer (there's nothing to read yet) */
459         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
460                                dqp->q_blkno,
461                                XFS_QI_DQCHUNKLEN(mp),
462                                0);
463         if (!bp || (error = XFS_BUF_GETERROR(bp)))
464                 goto error1;
465         /*
466          * Make a chunk of dquots out of this buffer and log
467          * the entire thing.
468          */
469         xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
470                               dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
471
472         /*
473          * xfs_bmap_finish() may commit the current transaction and
474          * start a second transaction if the freelist is not empty.
475          *
476          * Since we still want to modify this buffer, we need to
477          * ensure that the buffer is not released on commit of
478          * the first transaction and ensure the buffer is added to the
479          * second transaction.
480          *
481          * If there is only one transaction then don't stop the buffer
482          * from being released when it commits later on.
483          */
484
485         xfs_trans_bhold(tp, bp);
486
487         if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
488                 goto error1;
489         }
490
491         if (committed) {
492                 tp = *tpp;
493                 xfs_trans_bjoin(tp, bp);
494         } else {
495                 xfs_trans_bhold_release(tp, bp);
496         }
497
498         *O_bpp = bp;
499         return 0;
500
501       error1:
502         xfs_bmap_cancel(&flist);
503       error0:
504         xfs_iunlock(quotip, XFS_ILOCK_EXCL);
505
506         return (error);
507 }
508
509 /*
510  * Maps a dquot to the buffer containing its on-disk version.
511  * This returns a ptr to the buffer containing the on-disk dquot
512  * in the bpp param, and a ptr to the on-disk dquot within that buffer
513  */
514 STATIC int
515 xfs_qm_dqtobp(
516         xfs_trans_t             **tpp,
517         xfs_dquot_t             *dqp,
518         xfs_disk_dquot_t        **O_ddpp,
519         xfs_buf_t               **O_bpp,
520         uint                    flags)
521 {
522         xfs_bmbt_irec_t map;
523         int             nmaps, error;
524         xfs_buf_t       *bp;
525         xfs_inode_t     *quotip;
526         xfs_mount_t     *mp;
527         xfs_disk_dquot_t *ddq;
528         xfs_dqid_t      id;
529         boolean_t       newdquot;
530         xfs_trans_t     *tp = (tpp ? *tpp : NULL);
531
532         mp = dqp->q_mount;
533         id = be32_to_cpu(dqp->q_core.d_id);
534         nmaps = 1;
535         newdquot = B_FALSE;
536
537         /*
538          * If we don't know where the dquot lives, find out.
539          */
540         if (dqp->q_blkno == (xfs_daddr_t) 0) {
541                 /* We use the id as an index */
542                 dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp);
543                 nmaps = 1;
544                 quotip = XFS_DQ_TO_QIP(dqp);
545                 xfs_ilock(quotip, XFS_ILOCK_SHARED);
546                 /*
547                  * Return if this type of quotas is turned off while we didn't
548                  * have an inode lock
549                  */
550                 if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
551                         xfs_iunlock(quotip, XFS_ILOCK_SHARED);
552                         return (ESRCH);
553                 }
554                 /*
555                  * Find the block map; no allocations yet
556                  */
557                 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
558                                   XFS_DQUOT_CLUSTER_SIZE_FSB,
559                                   XFS_BMAPI_METADATA,
560                                   NULL, 0, &map, &nmaps, NULL, NULL);
561
562                 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
563                 if (error)
564                         return (error);
565                 ASSERT(nmaps == 1);
566                 ASSERT(map.br_blockcount == 1);
567
568                 /*
569                  * offset of dquot in the (fixed sized) dquot chunk.
570                  */
571                 dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
572                         sizeof(xfs_dqblk_t);
573                 if (map.br_startblock == HOLESTARTBLOCK) {
574                         /*
575                          * We don't allocate unless we're asked to
576                          */
577                         if (!(flags & XFS_QMOPT_DQALLOC))
578                                 return (ENOENT);
579
580                         ASSERT(tp);
581                         if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
582                                                 dqp->q_fileoffset, &bp)))
583                                 return (error);
584                         tp = *tpp;
585                         newdquot = B_TRUE;
586                 } else {
587                         /*
588                          * store the blkno etc so that we don't have to do the
589                          * mapping all the time
590                          */
591                         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
592                 }
593         }
594         ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
595         ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
596
597         /*
598          * Read in the buffer, unless we've just done the allocation
599          * (in which case we already have the buf).
600          */
601         if (! newdquot) {
602                 xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
603                 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
604                                                dqp->q_blkno,
605                                                XFS_QI_DQCHUNKLEN(mp),
606                                                0, &bp))) {
607                         return (error);
608                 }
609                 if (error || !bp)
610                         return XFS_ERROR(error);
611         }
612         ASSERT(XFS_BUF_ISBUSY(bp));
613         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
614
615         /*
616          * calculate the location of the dquot inside the buffer.
617          */
618         ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
619
620         /*
621          * A simple sanity check in case we got a corrupted dquot...
622          */
623         if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
624                            flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
625                            "dqtobp")) {
626                 if (!(flags & XFS_QMOPT_DQREPAIR)) {
627                         xfs_trans_brelse(tp, bp);
628                         return XFS_ERROR(EIO);
629                 }
630                 XFS_BUF_BUSY(bp); /* We dirtied this */
631         }
632
633         *O_bpp = bp;
634         *O_ddpp = ddq;
635
636         return (0);
637 }
638
639
640 /*
641  * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
642  * and release the buffer immediately.
643  *
644  */
645 /* ARGSUSED */
646 STATIC int
647 xfs_qm_dqread(
648         xfs_trans_t     **tpp,
649         xfs_dqid_t      id,
650         xfs_dquot_t     *dqp,   /* dquot to get filled in */
651         uint            flags)
652 {
653         xfs_disk_dquot_t *ddqp;
654         xfs_buf_t        *bp;
655         int              error;
656         xfs_trans_t      *tp;
657
658         ASSERT(tpp);
659
660         /*
661          * get a pointer to the on-disk dquot and the buffer containing it
662          * dqp already knows its own type (GROUP/USER).
663          */
664         xfs_dqtrace_entry(dqp, "DQREAD");
665         if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
666                 return (error);
667         }
668         tp = *tpp;
669
670         /* copy everything from disk dquot to the incore dquot */
671         memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
672         ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
673         xfs_qm_dquot_logitem_init(dqp);
674
675         /*
676          * Reservation counters are defined as reservation plus current usage
677          * to avoid having to add everytime.
678          */
679         dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
680         dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
681         dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
682
683         /* Mark the buf so that this will stay incore a little longer */
684         XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
685
686         /*
687          * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
688          * So we need to release with xfs_trans_brelse().
689          * The strategy here is identical to that of inodes; we lock
690          * the dquot in xfs_qm_dqget() before making it accessible to
691          * others. This is because dquots, like inodes, need a good level of
692          * concurrency, and we don't want to take locks on the entire buffers
693          * for dquot accesses.
694          * Note also that the dquot buffer may even be dirty at this point, if
695          * this particular dquot was repaired. We still aren't afraid to
696          * brelse it because we have the changes incore.
697          */
698         ASSERT(XFS_BUF_ISBUSY(bp));
699         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
700         xfs_trans_brelse(tp, bp);
701
702         return (error);
703 }
704
705
706 /*
707  * allocate an incore dquot from the kernel heap,
708  * and fill its core with quota information kept on disk.
709  * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
710  * if it wasn't already allocated.
711  */
712 STATIC int
713 xfs_qm_idtodq(
714         xfs_mount_t     *mp,
715         xfs_dqid_t      id,      /* gid or uid, depending on type */
716         uint            type,    /* UDQUOT or GDQUOT */
717         uint            flags,   /* DQALLOC, DQREPAIR */
718         xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
719 {
720         xfs_dquot_t     *dqp;
721         int             error;
722         xfs_trans_t     *tp;
723         int             cancelflags=0;
724
725         dqp = xfs_qm_dqinit(mp, id, type);
726         tp = NULL;
727         if (flags & XFS_QMOPT_DQALLOC) {
728                 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
729                 if ((error = xfs_trans_reserve(tp,
730                                        XFS_QM_DQALLOC_SPACE_RES(mp),
731                                        XFS_WRITE_LOG_RES(mp) +
732                                               BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
733                                               128,
734                                        0,
735                                        XFS_TRANS_PERM_LOG_RES,
736                                        XFS_WRITE_LOG_COUNT))) {
737                         cancelflags = 0;
738                         goto error0;
739                 }
740                 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
741         }
742
743         /*
744          * Read it from disk; xfs_dqread() takes care of
745          * all the necessary initialization of dquot's fields (locks, etc)
746          */
747         if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
748                 /*
749                  * This can happen if quotas got turned off (ESRCH),
750                  * or if the dquot didn't exist on disk and we ask to
751                  * allocate (ENOENT).
752                  */
753                 xfs_dqtrace_entry(dqp, "DQREAD FAIL");
754                 cancelflags |= XFS_TRANS_ABORT;
755                 goto error0;
756         }
757         if (tp) {
758                 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
759                                              NULL)))
760                         goto error1;
761         }
762
763         *O_dqpp = dqp;
764         return (0);
765
766  error0:
767         ASSERT(error);
768         if (tp)
769                 xfs_trans_cancel(tp, cancelflags);
770  error1:
771         xfs_qm_dqdestroy(dqp);
772         *O_dqpp = NULL;
773         return (error);
774 }
775
776 /*
777  * Lookup a dquot in the incore dquot hashtable. We keep two separate
778  * hashtables for user and group dquots; and, these are global tables
779  * inside the XQM, not per-filesystem tables.
780  * The hash chain must be locked by caller, and it is left locked
781  * on return. Returning dquot is locked.
782  */
783 STATIC int
784 xfs_qm_dqlookup(
785         xfs_mount_t             *mp,
786         xfs_dqid_t              id,
787         xfs_dqhash_t            *qh,
788         xfs_dquot_t             **O_dqpp)
789 {
790         xfs_dquot_t             *dqp;
791         uint                    flist_locked;
792         xfs_dquot_t             *d;
793
794         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
795
796         flist_locked = B_FALSE;
797
798         /*
799          * Traverse the hashchain looking for a match
800          */
801         for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
802                 /*
803                  * We already have the hashlock. We don't need the
804                  * dqlock to look at the id field of the dquot, since the
805                  * id can't be modified without the hashlock anyway.
806                  */
807                 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
808                         xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
809                         /*
810                          * All in core dquots must be on the dqlist of mp
811                          */
812                         ASSERT(dqp->MPL_PREVP != NULL);
813
814                         xfs_dqlock(dqp);
815                         if (dqp->q_nrefs == 0) {
816                                 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
817                                 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
818                                         xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
819
820                                         /*
821                                          * We may have raced with dqreclaim_one()
822                                          * (and lost). So, flag that we don't
823                                          * want the dquot to be reclaimed.
824                                          */
825                                         dqp->dq_flags |= XFS_DQ_WANT;
826                                         xfs_dqunlock(dqp);
827                                         xfs_qm_freelist_lock(xfs_Gqm);
828                                         xfs_dqlock(dqp);
829                                         dqp->dq_flags &= ~(XFS_DQ_WANT);
830                                 }
831                                 flist_locked = B_TRUE;
832                         }
833
834                         /*
835                          * id couldn't have changed; we had the hashlock all
836                          * along
837                          */
838                         ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
839
840                         if (flist_locked) {
841                                 if (dqp->q_nrefs != 0) {
842                                         xfs_qm_freelist_unlock(xfs_Gqm);
843                                         flist_locked = B_FALSE;
844                                 } else {
845                                         /*
846                                          * take it off the freelist
847                                          */
848                                         xfs_dqtrace_entry(dqp,
849                                                         "DQLOOKUP: TAKEOFF FL");
850                                         XQM_FREELIST_REMOVE(dqp);
851                                         /* xfs_qm_freelist_print(&(xfs_Gqm->
852                                                         qm_dqfreelist),
853                                                         "after removal"); */
854                                 }
855                         }
856
857                         /*
858                          * grab a reference
859                          */
860                         XFS_DQHOLD(dqp);
861
862                         if (flist_locked)
863                                 xfs_qm_freelist_unlock(xfs_Gqm);
864                         /*
865                          * move the dquot to the front of the hashchain
866                          */
867                         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
868                         if (dqp->HL_PREVP != &qh->qh_next) {
869                                 xfs_dqtrace_entry(dqp,
870                                                   "DQLOOKUP: HASH MOVETOFRONT");
871                                 if ((d = dqp->HL_NEXT))
872                                         d->HL_PREVP = dqp->HL_PREVP;
873                                 *(dqp->HL_PREVP) = d;
874                                 d = qh->qh_next;
875                                 d->HL_PREVP = &dqp->HL_NEXT;
876                                 dqp->HL_NEXT = d;
877                                 dqp->HL_PREVP = &qh->qh_next;
878                                 qh->qh_next = dqp;
879                         }
880                         xfs_dqtrace_entry(dqp, "LOOKUP END");
881                         *O_dqpp = dqp;
882                         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
883                         return (0);
884                 }
885         }
886
887         *O_dqpp = NULL;
888         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
889         return (1);
890 }
891
892 /*
893  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
894  * a locked dquot, doing an allocation (if requested) as needed.
895  * When both an inode and an id are given, the inode's id takes precedence.
896  * That is, if the id changes while we don't hold the ilock inside this
897  * function, the new dquot is returned, not necessarily the one requested
898  * in the id argument.
899  */
900 int
901 xfs_qm_dqget(
902         xfs_mount_t     *mp,
903         xfs_inode_t     *ip,      /* locked inode (optional) */
904         xfs_dqid_t      id,       /* uid/projid/gid depending on type */
905         uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
906         uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
907         xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
908 {
909         xfs_dquot_t     *dqp;
910         xfs_dqhash_t    *h;
911         uint            version;
912         int             error;
913
914         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
915         if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
916             (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
917             (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
918                 return (ESRCH);
919         }
920         h = XFS_DQ_HASH(mp, id, type);
921
922 #ifdef DEBUG
923         if (xfs_do_dqerror) {
924                 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
925                     (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
926                         cmn_err(CE_DEBUG, "Returning error in dqget");
927                         return (EIO);
928                 }
929         }
930 #endif
931
932  again:
933
934 #ifdef DEBUG
935         ASSERT(type == XFS_DQ_USER ||
936                type == XFS_DQ_PROJ ||
937                type == XFS_DQ_GROUP);
938         if (ip) {
939                 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
940                 if (type == XFS_DQ_USER)
941                         ASSERT(ip->i_udquot == NULL);
942                 else
943                         ASSERT(ip->i_gdquot == NULL);
944         }
945 #endif
946         XFS_DQ_HASH_LOCK(h);
947
948         /*
949          * Look in the cache (hashtable).
950          * The chain is kept locked during lookup.
951          */
952         if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
953                 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
954                 /*
955                  * The dquot was found, moved to the front of the chain,
956                  * taken off the freelist if it was on it, and locked
957                  * at this point. Just unlock the hashchain and return.
958                  */
959                 ASSERT(*O_dqpp);
960                 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
961                 XFS_DQ_HASH_UNLOCK(h);
962                 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
963                 return (0);     /* success */
964         }
965         XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
966
967         /*
968          * Dquot cache miss. We don't want to keep the inode lock across
969          * a (potential) disk read. Also we don't want to deal with the lock
970          * ordering between quotainode and this inode. OTOH, dropping the inode
971          * lock here means dealing with a chown that can happen before
972          * we re-acquire the lock.
973          */
974         if (ip)
975                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
976         /*
977          * Save the hashchain version stamp, and unlock the chain, so that
978          * we don't keep the lock across a disk read
979          */
980         version = h->qh_version;
981         XFS_DQ_HASH_UNLOCK(h);
982
983         /*
984          * Allocate the dquot on the kernel heap, and read the ondisk
985          * portion off the disk. Also, do all the necessary initialization
986          * This can return ENOENT if dquot didn't exist on disk and we didn't
987          * ask it to allocate; ESRCH if quotas got turned off suddenly.
988          */
989         if ((error = xfs_qm_idtodq(mp, id, type,
990                                   flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
991                                            XFS_QMOPT_DOWARN),
992                                   &dqp))) {
993                 if (ip)
994                         xfs_ilock(ip, XFS_ILOCK_EXCL);
995                 return (error);
996         }
997
998         /*
999          * See if this is mount code calling to look at the overall quota limits
1000          * which are stored in the id == 0 user or group's dquot.
1001          * Since we may not have done a quotacheck by this point, just return
1002          * the dquot without attaching it to any hashtables, lists, etc, or even
1003          * taking a reference.
1004          * The caller must dqdestroy this once done.
1005          */
1006         if (flags & XFS_QMOPT_DQSUSER) {
1007                 ASSERT(id == 0);
1008                 ASSERT(! ip);
1009                 goto dqret;
1010         }
1011
1012         /*
1013          * Dquot lock comes after hashlock in the lock ordering
1014          */
1015         if (ip) {
1016                 xfs_ilock(ip, XFS_ILOCK_EXCL);
1017                 if (! XFS_IS_DQTYPE_ON(mp, type)) {
1018                         /* inode stays locked on return */
1019                         xfs_qm_dqdestroy(dqp);
1020                         return XFS_ERROR(ESRCH);
1021                 }
1022                 /*
1023                  * A dquot could be attached to this inode by now, since
1024                  * we had dropped the ilock.
1025                  */
1026                 if (type == XFS_DQ_USER) {
1027                         if (ip->i_udquot) {
1028                                 xfs_qm_dqdestroy(dqp);
1029                                 dqp = ip->i_udquot;
1030                                 xfs_dqlock(dqp);
1031                                 goto dqret;
1032                         }
1033                 } else {
1034                         if (ip->i_gdquot) {
1035                                 xfs_qm_dqdestroy(dqp);
1036                                 dqp = ip->i_gdquot;
1037                                 xfs_dqlock(dqp);
1038                                 goto dqret;
1039                         }
1040                 }
1041         }
1042
1043         /*
1044          * Hashlock comes after ilock in lock order
1045          */
1046         XFS_DQ_HASH_LOCK(h);
1047         if (version != h->qh_version) {
1048                 xfs_dquot_t *tmpdqp;
1049                 /*
1050                  * Now, see if somebody else put the dquot in the
1051                  * hashtable before us. This can happen because we didn't
1052                  * keep the hashchain lock. We don't have to worry about
1053                  * lock order between the two dquots here since dqp isn't
1054                  * on any findable lists yet.
1055                  */
1056                 if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
1057                         /*
1058                          * Duplicate found. Just throw away the new dquot
1059                          * and start over.
1060                          */
1061                         xfs_qm_dqput(tmpdqp);
1062                         XFS_DQ_HASH_UNLOCK(h);
1063                         xfs_qm_dqdestroy(dqp);
1064                         XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
1065                         goto again;
1066                 }
1067         }
1068
1069         /*
1070          * Put the dquot at the beginning of the hash-chain and mp's list
1071          * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
1072          */
1073         ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
1074         dqp->q_hash = h;
1075         XQM_HASHLIST_INSERT(h, dqp);
1076
1077         /*
1078          * Attach this dquot to this filesystem's list of all dquots,
1079          * kept inside the mount structure in m_quotainfo field
1080          */
1081         xfs_qm_mplist_lock(mp);
1082
1083         /*
1084          * We return a locked dquot to the caller, with a reference taken
1085          */
1086         xfs_dqlock(dqp);
1087         dqp->q_nrefs = 1;
1088
1089         XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
1090
1091         xfs_qm_mplist_unlock(mp);
1092         XFS_DQ_HASH_UNLOCK(h);
1093  dqret:
1094         ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
1095         xfs_dqtrace_entry(dqp, "DQGET DONE");
1096         *O_dqpp = dqp;
1097         return (0);
1098 }
1099
1100
1101 /*
1102  * Release a reference to the dquot (decrement ref-count)
1103  * and unlock it. If there is a group quota attached to this
1104  * dquot, carefully release that too without tripping over
1105  * deadlocks'n'stuff.
1106  */
1107 void
1108 xfs_qm_dqput(
1109         xfs_dquot_t     *dqp)
1110 {
1111         xfs_dquot_t     *gdqp;
1112
1113         ASSERT(dqp->q_nrefs > 0);
1114         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1115         xfs_dqtrace_entry(dqp, "DQPUT");
1116
1117         if (dqp->q_nrefs != 1) {
1118                 dqp->q_nrefs--;
1119                 xfs_dqunlock(dqp);
1120                 return;
1121         }
1122
1123         /*
1124          * drop the dqlock and acquire the freelist and dqlock
1125          * in the right order; but try to get it out-of-order first
1126          */
1127         if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
1128                 xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
1129                 xfs_dqunlock(dqp);
1130                 xfs_qm_freelist_lock(xfs_Gqm);
1131                 xfs_dqlock(dqp);
1132         }
1133
1134         while (1) {
1135                 gdqp = NULL;
1136
1137                 /* We can't depend on nrefs being == 1 here */
1138                 if (--dqp->q_nrefs == 0) {
1139                         xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
1140                         /*
1141                          * insert at end of the freelist.
1142                          */
1143                         XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
1144
1145                         /*
1146                          * If we just added a udquot to the freelist, then
1147                          * we want to release the gdquot reference that
1148                          * it (probably) has. Otherwise it'll keep the
1149                          * gdquot from getting reclaimed.
1150                          */
1151                         if ((gdqp = dqp->q_gdquot)) {
1152                                 /*
1153                                  * Avoid a recursive dqput call
1154                                  */
1155                                 xfs_dqlock(gdqp);
1156                                 dqp->q_gdquot = NULL;
1157                         }
1158
1159                         /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
1160                            "@@@@@++ Free list (after append) @@@@@+");
1161                            */
1162                 }
1163                 xfs_dqunlock(dqp);
1164
1165                 /*
1166                  * If we had a group quota inside the user quota as a hint,
1167                  * release it now.
1168                  */
1169                 if (! gdqp)
1170                         break;
1171                 dqp = gdqp;
1172         }
1173         xfs_qm_freelist_unlock(xfs_Gqm);
1174 }
1175
1176 /*
1177  * Release a dquot. Flush it if dirty, then dqput() it.
1178  * dquot must not be locked.
1179  */
1180 void
1181 xfs_qm_dqrele(
1182         xfs_dquot_t     *dqp)
1183 {
1184         ASSERT(dqp);
1185         xfs_dqtrace_entry(dqp, "DQRELE");
1186
1187         xfs_dqlock(dqp);
1188         /*
1189          * We don't care to flush it if the dquot is dirty here.
1190          * That will create stutters that we want to avoid.
1191          * Instead we do a delayed write when we try to reclaim
1192          * a dirty dquot. Also xfs_sync will take part of the burden...
1193          */
1194         xfs_qm_dqput(dqp);
1195 }
1196
1197
1198 /*
1199  * Write a modified dquot to disk.
1200  * The dquot must be locked and the flush lock too taken by caller.
1201  * The flush lock will not be unlocked until the dquot reaches the disk,
1202  * but the dquot is free to be unlocked and modified by the caller
1203  * in the interim. Dquot is still locked on return. This behavior is
1204  * identical to that of inodes.
1205  */
1206 int
1207 xfs_qm_dqflush(
1208         xfs_dquot_t             *dqp,
1209         uint                    flags)
1210 {
1211         xfs_mount_t             *mp;
1212         xfs_buf_t               *bp;
1213         xfs_disk_dquot_t        *ddqp;
1214         int                     error;
1215         SPLDECL(s);
1216
1217         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1218         ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
1219         xfs_dqtrace_entry(dqp, "DQFLUSH");
1220
1221         /*
1222          * If not dirty, nada.
1223          */
1224         if (!XFS_DQ_IS_DIRTY(dqp)) {
1225                 xfs_dqfunlock(dqp);
1226                 return (0);
1227         }
1228
1229         /*
1230          * Cant flush a pinned dquot. Wait for it.
1231          */
1232         xfs_qm_dqunpin_wait(dqp);
1233
1234         /*
1235          * This may have been unpinned because the filesystem is shutting
1236          * down forcibly. If that's the case we must not write this dquot
1237          * to disk, because the log record didn't make it to disk!
1238          */
1239         if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
1240                 dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1241                 xfs_dqfunlock(dqp);
1242                 return XFS_ERROR(EIO);
1243         }
1244
1245         /*
1246          * Get the buffer containing the on-disk dquot
1247          * We don't need a transaction envelope because we know that the
1248          * the ondisk-dquot has already been allocated for.
1249          */
1250         if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
1251                 xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
1252                 ASSERT(error != ENOENT);
1253                 /*
1254                  * Quotas could have gotten turned off (ESRCH)
1255                  */
1256                 xfs_dqfunlock(dqp);
1257                 return (error);
1258         }
1259
1260         if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
1261                            0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
1262                 xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
1263                 return XFS_ERROR(EIO);
1264         }
1265
1266         /* This is the only portion of data that needs to persist */
1267         memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
1268
1269         /*
1270          * Clear the dirty field and remember the flush lsn for later use.
1271          */
1272         dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1273         mp = dqp->q_mount;
1274
1275         /* lsn is 64 bits */
1276         AIL_LOCK(mp, s);
1277         dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
1278         AIL_UNLOCK(mp, s);
1279
1280         /*
1281          * Attach an iodone routine so that we can remove this dquot from the
1282          * AIL and release the flush lock once the dquot is synced to disk.
1283          */
1284         xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
1285                               xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
1286         /*
1287          * If the buffer is pinned then push on the log so we won't
1288          * get stuck waiting in the write for too long.
1289          */
1290         if (XFS_BUF_ISPINNED(bp)) {
1291                 xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
1292                 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
1293         }
1294
1295         if (flags & XFS_QMOPT_DELWRI) {
1296                 xfs_bdwrite(mp, bp);
1297         } else if (flags & XFS_QMOPT_ASYNC) {
1298                 xfs_bawrite(mp, bp);
1299         } else {
1300                 error = xfs_bwrite(mp, bp);
1301         }
1302         xfs_dqtrace_entry(dqp, "DQFLUSH END");
1303         /*
1304          * dqp is still locked, but caller is free to unlock it now.
1305          */
1306         return (error);
1307
1308 }
1309
1310 /*
1311  * This is the dquot flushing I/O completion routine.  It is called
1312  * from interrupt level when the buffer containing the dquot is
1313  * flushed to disk.  It is responsible for removing the dquot logitem
1314  * from the AIL if it has not been re-logged, and unlocking the dquot's
1315  * flush lock. This behavior is very similar to that of inodes..
1316  */
1317 /*ARGSUSED*/
1318 STATIC void
1319 xfs_qm_dqflush_done(
1320         xfs_buf_t               *bp,
1321         xfs_dq_logitem_t        *qip)
1322 {
1323         xfs_dquot_t             *dqp;
1324         SPLDECL(s);
1325
1326         dqp = qip->qli_dquot;
1327
1328         /*
1329          * We only want to pull the item from the AIL if its
1330          * location in the log has not changed since we started the flush.
1331          * Thus, we only bother if the dquot's lsn has
1332          * not changed. First we check the lsn outside the lock
1333          * since it's cheaper, and then we recheck while
1334          * holding the lock before removing the dquot from the AIL.
1335          */
1336         if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1337             qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1338
1339                 AIL_LOCK(dqp->q_mount, s);
1340                 /*
1341                  * xfs_trans_delete_ail() drops the AIL lock.
1342                  */
1343                 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1344                         xfs_trans_delete_ail(dqp->q_mount,
1345                                              (xfs_log_item_t*)qip, s);
1346                 else
1347                         AIL_UNLOCK(dqp->q_mount, s);
1348         }
1349
1350         /*
1351          * Release the dq's flush lock since we're done with it.
1352          */
1353         xfs_dqfunlock(dqp);
1354 }
1355
1356
1357 int
1358 xfs_qm_dqflock_nowait(
1359         xfs_dquot_t *dqp)
1360 {
1361         int locked;
1362
1363         locked = cpsema(&((dqp)->q_flock));
1364
1365         /* XXX ifdef these out */
1366         if (locked)
1367                 (dqp)->dq_flags |= XFS_DQ_FLOCKED;
1368         return (locked);
1369 }
1370
1371
1372 int
1373 xfs_qm_dqlock_nowait(
1374         xfs_dquot_t *dqp)
1375 {
1376         return (mutex_trylock(&((dqp)->q_qlock)));
1377 }
1378
1379 void
1380 xfs_dqlock(
1381         xfs_dquot_t *dqp)
1382 {
1383         mutex_lock(&(dqp->q_qlock));
1384 }
1385
1386 void
1387 xfs_dqunlock(
1388         xfs_dquot_t *dqp)
1389 {
1390         mutex_unlock(&(dqp->q_qlock));
1391         if (dqp->q_logitem.qli_dquot == dqp) {
1392                 /* Once was dqp->q_mount, but might just have been cleared */
1393                 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
1394                                         (xfs_log_item_t*)&(dqp->q_logitem));
1395         }
1396 }
1397
1398
1399 void
1400 xfs_dqunlock_nonotify(
1401         xfs_dquot_t *dqp)
1402 {
1403         mutex_unlock(&(dqp->q_qlock));
1404 }
1405
1406 void
1407 xfs_dqlock2(
1408         xfs_dquot_t     *d1,
1409         xfs_dquot_t     *d2)
1410 {
1411         if (d1 && d2) {
1412                 ASSERT(d1 != d2);
1413                 if (be32_to_cpu(d1->q_core.d_id) >
1414                     be32_to_cpu(d2->q_core.d_id)) {
1415                         xfs_dqlock(d2);
1416                         xfs_dqlock(d1);
1417                 } else {
1418                         xfs_dqlock(d1);
1419                         xfs_dqlock(d2);
1420                 }
1421         } else {
1422                 if (d1) {
1423                         xfs_dqlock(d1);
1424                 } else if (d2) {
1425                         xfs_dqlock(d2);
1426                 }
1427         }
1428 }
1429
1430
1431 /*
1432  * Take a dquot out of the mount's dqlist as well as the hashlist.
1433  * This is called via unmount as well as quotaoff, and the purge
1434  * will always succeed unless there are soft (temp) references
1435  * outstanding.
1436  *
1437  * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
1438  * that we're returning! XXXsup - not cool.
1439  */
1440 /* ARGSUSED */
1441 int
1442 xfs_qm_dqpurge(
1443         xfs_dquot_t     *dqp,
1444         uint            flags)
1445 {
1446         xfs_dqhash_t    *thishash;
1447         xfs_mount_t     *mp;
1448
1449         mp = dqp->q_mount;
1450
1451         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
1452         ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
1453
1454         xfs_dqlock(dqp);
1455         /*
1456          * We really can't afford to purge a dquot that is
1457          * referenced, because these are hard refs.
1458          * It shouldn't happen in general because we went thru _all_ inodes in
1459          * dqrele_all_inodes before calling this and didn't let the mountlock go.
1460          * However it is possible that we have dquots with temporary
1461          * references that are not attached to an inode. e.g. see xfs_setattr().
1462          */
1463         if (dqp->q_nrefs != 0) {
1464                 xfs_dqunlock(dqp);
1465                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
1466                 return (1);
1467         }
1468
1469         ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
1470
1471         /*
1472          * If we're turning off quotas, we have to make sure that, for
1473          * example, we don't delete quota disk blocks while dquots are
1474          * in the process of getting written to those disk blocks.
1475          * This dquot might well be on AIL, and we can't leave it there
1476          * if we're turning off quotas. Basically, we need this flush
1477          * lock, and are willing to block on it.
1478          */
1479         if (! xfs_qm_dqflock_nowait(dqp)) {
1480                 /*
1481                  * Block on the flush lock after nudging dquot buffer,
1482                  * if it is incore.
1483                  */
1484                 xfs_qm_dqflock_pushbuf_wait(dqp);
1485         }
1486
1487         /*
1488          * XXXIf we're turning this type of quotas off, we don't care
1489          * about the dirty metadata sitting in this dquot. OTOH, if
1490          * we're unmounting, we do care, so we flush it and wait.
1491          */
1492         if (XFS_DQ_IS_DIRTY(dqp)) {
1493                 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
1494                 /* dqflush unlocks dqflock */
1495                 /*
1496                  * Given that dqpurge is a very rare occurrence, it is OK
1497                  * that we're holding the hashlist and mplist locks
1498                  * across the disk write. But, ... XXXsup
1499                  *
1500                  * We don't care about getting disk errors here. We need
1501                  * to purge this dquot anyway, so we go ahead regardless.
1502                  */
1503                 (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
1504                 xfs_dqflock(dqp);
1505         }
1506         ASSERT(dqp->q_pincount == 0);
1507         ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1508                !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1509
1510         thishash = dqp->q_hash;
1511         XQM_HASHLIST_REMOVE(thishash, dqp);
1512         XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
1513         /*
1514          * XXX Move this to the front of the freelist, if we can get the
1515          * freelist lock.
1516          */
1517         ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
1518
1519         dqp->q_mount = NULL;
1520         dqp->q_hash = NULL;
1521         dqp->dq_flags = XFS_DQ_INACTIVE;
1522         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1523         xfs_dqfunlock(dqp);
1524         xfs_dqunlock(dqp);
1525         XFS_DQ_HASH_UNLOCK(thishash);
1526         return (0);
1527 }
1528
1529
1530 #ifdef QUOTADEBUG
1531 void
1532 xfs_qm_dqprint(xfs_dquot_t *dqp)
1533 {
1534         cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
1535         cmn_err(CE_DEBUG, "---- dquotID =  %d",
1536                 (int)be32_to_cpu(dqp->q_core.d_id));
1537         cmn_err(CE_DEBUG, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
1538         cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
1539         cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
1540         cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
1541         cmn_err(CE_DEBUG, "---- blkhlimit =  %Lu (0x%x)",
1542                 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1543                 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1544         cmn_err(CE_DEBUG, "---- blkslimit =  %Lu (0x%x)",
1545                 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1546                 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1547         cmn_err(CE_DEBUG, "---- inohlimit =  %Lu (0x%x)",
1548                 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1549                 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1550         cmn_err(CE_DEBUG, "---- inoslimit =  %Lu (0x%x)",
1551                 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1552                 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1553         cmn_err(CE_DEBUG, "---- bcount  =  %Lu (0x%x)",
1554                 be64_to_cpu(dqp->q_core.d_bcount),
1555                 (int)be64_to_cpu(dqp->q_core.d_bcount));
1556         cmn_err(CE_DEBUG, "---- icount  =  %Lu (0x%x)",
1557                 be64_to_cpu(dqp->q_core.d_icount),
1558                 (int)be64_to_cpu(dqp->q_core.d_icount));
1559         cmn_err(CE_DEBUG, "---- btimer  =  %d",
1560                 (int)be32_to_cpu(dqp->q_core.d_btimer));
1561         cmn_err(CE_DEBUG, "---- itimer  =  %d",
1562                 (int)be32_to_cpu(dqp->q_core.d_itimer));
1563         cmn_err(CE_DEBUG, "---------------------------");
1564 }
1565 #endif
1566
1567 /*
1568  * Give the buffer a little push if it is incore and
1569  * wait on the flush lock.
1570  */
1571 void
1572 xfs_qm_dqflock_pushbuf_wait(
1573         xfs_dquot_t     *dqp)
1574 {
1575         xfs_buf_t       *bp;
1576
1577         /*
1578          * Check to see if the dquot has been flushed delayed
1579          * write.  If so, grab its buffer and send it
1580          * out immediately.  We'll be able to acquire
1581          * the flush lock when the I/O completes.
1582          */
1583         bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
1584                     XFS_QI_DQCHUNKLEN(dqp->q_mount),
1585                     XFS_INCORE_TRYLOCK);
1586         if (bp != NULL) {
1587                 if (XFS_BUF_ISDELAYWRITE(bp)) {
1588                         if (XFS_BUF_ISPINNED(bp)) {
1589                                 xfs_log_force(dqp->q_mount,
1590                                               (xfs_lsn_t)0,
1591                                               XFS_LOG_FORCE);
1592                         }
1593                         xfs_bawrite(dqp->q_mount, bp);
1594                 } else {
1595                         xfs_buf_relse(bp);
1596                 }
1597         }
1598         xfs_dqflock(dqp);
1599 }