Removed trailing spaces
[libcds.git] / cds / compiler / gcc / ia64 / cxx11_atomic.h
1 /*
2     This file is a part of libcds - Concurrent Data Structures library
3
4     (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2016
5
6     Source code repo: http://github.com/khizmax/libcds/
7     Download: http://sourceforge.net/projects/libcds/files/
8
9     Redistribution and use in source and binary forms, with or without
10     modification, are permitted provided that the following conditions are met:
11
12     * Redistributions of source code must retain the above copyright notice, this
13       list of conditions and the following disclaimer.
14
15     * Redistributions in binary form must reproduce the above copyright notice,
16       this list of conditions and the following disclaimer in the documentation
17       and/or other materials provided with the distribution.
18
19     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #ifndef CDSLIB_COMPILER_GCC_IA64_CXX11_ATOMIC_H
32 #define CDSLIB_COMPILER_GCC_IA64_CXX11_ATOMIC_H
33
34 /*
35     Source:
36         1. load/store: http://www.decadent.org.uk/pipermail/cpp-threads/2008-December/001932.html
37         2. Mapping to C++ Memory Model: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
38 */
39
40 #include <cstdint>
41
42 //@cond
43 namespace cds { namespace cxx11_atomic {
44     namespace platform { CDS_CXX11_INLINE_NAMESPACE namespace gcc { CDS_CXX11_INLINE_NAMESPACE namespace ia64 {
45
46         static inline void itanium_full_fence() CDS_NOEXCEPT
47         {
48             __asm__ __volatile__ ( "mf \n\t" ::: "memory" );
49         }
50
51         static inline void fence_before( memory_order order ) CDS_NOEXCEPT
52         {
53             switch(order) {
54             case memory_order_relaxed:
55             case memory_order_consume:
56             case memory_order_acquire:
57                 break;
58             case memory_order_release:
59             case memory_order_acq_rel:
60                 CDS_COMPILER_RW_BARRIER;
61                 break;
62             case memory_order_seq_cst:
63                 itanium_full_fence();
64                 break;
65             }
66         }
67
68         static inline void fence_after( memory_order order ) CDS_NOEXCEPT
69         {
70             switch(order) {
71             case memory_order_acquire:
72             case memory_order_acq_rel:
73                 CDS_COMPILER_RW_BARRIER;
74                 break;
75             case memory_order_relaxed:
76             case memory_order_consume:
77             case memory_order_release:
78                 break;
79             case memory_order_seq_cst:
80                 itanium_full_fence();
81                 break;
82             }
83         }
84
85
86         //-----------------------------------------------------------------------------
87         // fences
88         //-----------------------------------------------------------------------------
89         static inline void thread_fence(memory_order order) CDS_NOEXCEPT
90         {
91             switch(order)
92             {
93                 case memory_order_relaxed:
94                 case memory_order_consume:
95                     break;
96                 case memory_order_release:
97                 case memory_order_acquire:
98                 case memory_order_acq_rel:
99                     CDS_COMPILER_RW_BARRIER;
100                     break;
101                 case memory_order_seq_cst:
102                     itanium_full_fence();
103                     break;
104                 default:;
105             }
106         }
107
108         static inline void signal_fence(memory_order order) CDS_NOEXCEPT
109         {
110             // C++11: 29.8.8: only compiler optimization, no hardware instructions
111             switch(order)
112             {
113                 case memory_order_relaxed:
114                     break;
115                 case memory_order_consume:
116                 case memory_order_release:
117                 case memory_order_acquire:
118                 case memory_order_acq_rel:
119                 case memory_order_seq_cst:
120                     CDS_COMPILER_RW_BARRIER;
121                     break;
122                 default:;
123             }
124         }
125
126 #define CDS_ITANIUM_ATOMIC_LOAD( n_bytes, n_bits )   \
127         template <typename T>   \
128         static inline T load##n_bits( T volatile const * pSrc, memory_order order ) CDS_NOEXCEPT \
129         { \
130             static_assert( sizeof(T) == n_bytes, "Illegal size of operand" )   ; \
131             assert( order ==  memory_order_relaxed \
132                 || order ==  memory_order_consume  \
133                 || order ==  memory_order_acquire  \
134                 || order == memory_order_seq_cst   \
135                 ) ; \
136             assert( pSrc )  ; \
137             T val    ; \
138             __asm__ __volatile__ ( \
139                 "ld" #n_bytes ".acq %[val] = [%[pSrc]]  \n\t" \
140                 : [val] "=r" (val) \
141                 : [pSrc] "r" (pSrc) \
142                 : "memory" \
143                 ) ; \
144             return val ; \
145         }
146
147 #define CDS_ITANIUM_ATOMIC_STORE( n_bytes, n_bits ) \
148         template <typename T> \
149         static inline void store##n_bits( T volatile * pDest, T val, memory_order order ) CDS_NOEXCEPT \
150         { \
151             static_assert( sizeof(T) == n_bytes, "Illegal size of operand" )   ; \
152             assert( order ==  memory_order_relaxed \
153                 || order ==  memory_order_release  \
154                 || order == memory_order_seq_cst   \
155                 ) ; \
156             assert( pDest )  ; \
157             if ( order == memory_order_seq_cst ) { \
158                 __asm__ __volatile__ ( \
159                     "st" #n_bytes ".rel [%[pDest]] = %[val] \n\t" \
160                     "mf     \n\t" \
161                     :: [pDest] "r" (pDest), [val] "r" (val) \
162                     : "memory" \
163                     ) ; \
164             } \
165             else { \
166                 __asm__ __volatile__ ( \
167                     "st" #n_bytes ".rel [%[pDest]] = %[val] \n\t" \
168                     :: [pDest] "r" (pDest), [val] "r" (val) \
169                     : "memory" \
170                     ) ; \
171                 fence_after(order) ; \
172             } \
173         }
174
175 #define CDS_ITANIUM_ATOMIC_CAS( n_bytes, n_bits ) \
176         template <typename T> \
177         static inline bool cas##n_bits##_strong( T volatile * pDest, T& expected, T desired, memory_order mo_success, memory_order /*mo_fail*/ ) CDS_NOEXCEPT \
178         { \
179             static_assert( sizeof(T) == n_bytes, "Illegal size of operand" )   ; \
180             T current ; \
181             switch(mo_success) { \
182             case memory_order_relaxed: \
183             case memory_order_consume: \
184             case memory_order_acquire: \
185                 __asm__ __volatile__ ( \
186                     "mov ar.ccv = %[expected] ;;\n\t" \
187                     "cmpxchg" #n_bytes ".acq %[current] = [%[pDest]], %[desired], ar.ccv\n\t" \
188                     : [current] "=r" (current) \
189                     : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired) \
190                     : "ar.ccv", "memory" \
191                     ); \
192                 break ; \
193             case memory_order_release: \
194                 __asm__ __volatile__ ( \
195                     "mov ar.ccv = %[expected] ;;\n\t" \
196                     "cmpxchg" #n_bytes ".rel %[current] = [%[pDest]], %[desired], ar.ccv\n\t" \
197                     : [current] "=r" (current) \
198                     : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired) \
199                     : "ar.ccv", "memory" \
200                     ); \
201                 break ; \
202             case memory_order_acq_rel: \
203             case memory_order_seq_cst: \
204                 __asm__ __volatile__ ( \
205                     "mov ar.ccv = %[expected] ;;\n\t" \
206                     "cmpxchg" #n_bytes ".rel %[current] = [%[pDest]], %[desired], ar.ccv\n\t" \
207                     "mf \n\t" \
208                     : [current] "=r" (current) \
209                     : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired) \
210                     : "ar.ccv", "memory" \
211                     ); \
212                 break; \
213             default: \
214                 assert(false); \
215             } \
216             bool bSuccess = expected == current ; \
217             expected = current ; \
218             return bSuccess ; \
219         } \
220         template <typename T> \
221         static inline bool cas##n_bits##_weak( T volatile * pDest, T& expected, T desired, memory_order mo_success, memory_order mo_fail ) CDS_NOEXCEPT \
222         { return cas##n_bits##_strong( pDest, expected, desired, mo_success, mo_fail ); }
223
224         // xchg is performed with acquire semantics
225 #define CDS_ITANIUM_ATOMIC_EXCHANGE( n_bytes, n_bits ) \
226         template <typename T> \
227         static inline T exchange##n_bits( T volatile * pDest, T val, memory_order order ) CDS_NOEXCEPT \
228         { \
229             static_assert( sizeof(T) == n_bytes, "Illegal size of operand" )   ; \
230             assert( pDest ) ; \
231             T current ; \
232             switch(order) \
233             { \
234             case memory_order_relaxed: \
235             case memory_order_consume: \
236             case memory_order_acquire: \
237                 __asm__ __volatile__ ( \
238                     "xchg" #n_bytes " %[current] = [%[pDest]], %[val]\n\t" \
239                     : [current] "=r" (current) \
240                     : [pDest] "r" (pDest), [val] "r" (val) \
241                     : "memory" \
242                     ); \
243                 break; \
244             case memory_order_acq_rel: \
245             case memory_order_release: \
246             case memory_order_seq_cst: \
247                 __asm__ __volatile__ ( \
248                     "mf \n\t" \
249                     "xchg" #n_bytes " %[current] = [%[pDest]], %[val]\n\t" \
250                     : [current] "=r" (current) \
251                     : [pDest] "r" (pDest), [val] "r" (val) \
252                     : "memory" \
253                     ); \
254                 break; \
255             default: assert(false); \
256             } \
257             return current ; \
258         }
259
260 #define CDS_ITANIUM_ATOMIC_FETCH_ADD( n_bytes, n_add )  \
261         switch (order) { \
262             case memory_order_relaxed: \
263             case memory_order_consume: \
264             case memory_order_acquire: \
265                 __asm__ __volatile__ ( \
266                     "fetchadd" #n_bytes ".acq %[cur] = [%[pDest]], " #n_add " \n\t" \
267                     : [cur] "=r" (cur) \
268                     : [pDest] "r" (pDest) \
269                     : "memory" \
270                     ); \
271                 break ; \
272             case memory_order_release: \
273                 __asm__ __volatile__ ( \
274                     "fetchadd" #n_bytes ".rel %[cur] = [%[pDest]], " #n_add " \n\t" \
275                     : [cur] "=r" (cur) \
276                     : [pDest] "r" (pDest) \
277                     : "memory" \
278                     ); \
279                 break ; \
280             case memory_order_acq_rel: \
281             case memory_order_seq_cst: \
282                 __asm__ __volatile__ ( \
283                     "fetchadd" #n_bytes ".rel %[cur] = [%[pDest]], " #n_add " \n\t" \
284                     "mf \n\t" \
285                     : [cur] "=r" (cur) \
286                     : [pDest] "r" (pDest) \
287                     : "memory" \
288                     ); \
289                 break ; \
290             default: \
291                 assert(false); \
292         }
293
294         //-----------------------------------------------------------------------------
295         // 8bit primitives
296         //-----------------------------------------------------------------------------
297
298         CDS_ITANIUM_ATOMIC_LOAD( 1, 8 )
299         CDS_ITANIUM_ATOMIC_STORE( 1, 8 )
300         CDS_ITANIUM_ATOMIC_CAS( 1, 8 )
301         CDS_ITANIUM_ATOMIC_EXCHANGE( 1, 8 )
302
303         //-----------------------------------------------------------------------------
304         // 16bit primitives
305         //-----------------------------------------------------------------------------
306
307         CDS_ITANIUM_ATOMIC_LOAD( 2, 16 )
308         CDS_ITANIUM_ATOMIC_STORE( 2, 16 )
309         CDS_ITANIUM_ATOMIC_CAS( 2, 16 )
310         CDS_ITANIUM_ATOMIC_EXCHANGE( 2, 16 )
311
312         //-----------------------------------------------------------------------------
313         // 32bit primitives
314         //-----------------------------------------------------------------------------
315
316         CDS_ITANIUM_ATOMIC_LOAD( 4, 32 )
317         CDS_ITANIUM_ATOMIC_STORE( 4, 32 )
318         CDS_ITANIUM_ATOMIC_CAS( 4, 32 )
319         CDS_ITANIUM_ATOMIC_EXCHANGE( 4, 32 )
320
321 #       define CDS_ATOMIC_fetch32_add_defined
322         template <typename T>
323         static inline T fetch32_add( T volatile * pDest, T val, memory_order order) CDS_NOEXCEPT
324         {
325             static_assert( sizeof(T) == 4, "Illegal size of operand" );
326             assert( pDest );
327
328             T cur;
329             switch ( val ) {
330                 case 1:
331                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, 1 );
332                     break;
333                 case 4:
334                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, 4 );
335                     break;
336                 case 8:
337                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, 8 );
338                     break;
339                 case 16:
340                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, 16 );
341                     break;
342                 default:
343                     cur = load32( pDest, memory_order_relaxed );
344                     do {} while ( !cas32_strong( pDest, cur, cur + val, order, memory_order_relaxed ));
345                     break;
346             }
347             return cur;
348         }
349
350 #       define CDS_ATOMIC_fetch32_sub_defined
351         template <typename T>
352         static inline T fetch32_sub( T volatile * pDest, T val, memory_order order) CDS_NOEXCEPT
353         {
354             static_assert( sizeof(T) == 4, "Illegal size of operand" );
355             assert( pDest );
356             T cur;
357             switch ( val ) {
358                 case 1:
359                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, -1 );
360                     break;
361                 case 4:
362                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, -4 );
363                     break;
364                 case 8:
365                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, -8 );
366                     break;
367                 case 16:
368                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 4, -16 );
369                     break;
370                 default:
371                     cur = load32( pDest, memory_order_relaxed );
372                     do {} while ( !cas32_strong( pDest, cur, cur - val, order, memory_order_relaxed ));
373                     break;
374             }
375             return cur;
376         }
377
378         //-----------------------------------------------------------------------------
379         // 64bit primitives
380         //-----------------------------------------------------------------------------
381
382         CDS_ITANIUM_ATOMIC_LOAD( 8, 64 )
383         CDS_ITANIUM_ATOMIC_STORE( 8, 64 )
384         CDS_ITANIUM_ATOMIC_CAS( 8, 64 )
385         CDS_ITANIUM_ATOMIC_EXCHANGE( 8, 64 )
386
387 #       define CDS_ATOMIC_fetch64_add_defined
388         template <typename T>
389         static inline T fetch64_add( T volatile * pDest, T val, memory_order order) CDS_NOEXCEPT
390         {
391             static_assert( sizeof(T) == 8, "Illegal size of operand" );
392             assert( pDest );
393
394             T cur;
395             switch ( val ) {
396                 case 1:
397                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 1 );
398                     break;
399                 case 4:
400                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 4 );
401                     break;
402                 case 8:
403                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 8 );
404                     break;
405                 case 16:
406                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 16 );
407                     break;
408                 default:
409                     cur = load64( pDest, memory_order_relaxed );
410                     do {} while ( !cas64_strong( pDest, cur, cur + val, order, memory_order_relaxed ));
411                     break;
412             }
413             return cur;
414         }
415
416 #       define CDS_ATOMIC_fetch64_sub_defined
417         template <typename T>
418         static inline T fetch64_sub( T volatile * pDest, T val, memory_order order) CDS_NOEXCEPT
419         {
420             static_assert( sizeof(T) == 8, "Illegal size of operand" );
421             assert( pDest );
422             T cur;
423             switch ( val ) {
424                 case 1:
425                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -1 );
426                     break;
427                 case 4:
428                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -4 );
429                     break;
430                 case 8:
431                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -8 );
432                     break;
433                 case 16:
434                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -16 );
435                     break;
436                 default:
437                     cur = load64( pDest, memory_order_relaxed );
438                     do {} while ( !cas64_strong( pDest, cur, cur - val, order, memory_order_relaxed ));
439                     break;
440             }
441             return cur;
442         }
443
444         //-----------------------------------------------------------------------------
445         // pointer primitives
446         //-----------------------------------------------------------------------------
447         template <typename T>
448         static inline T * load_ptr( T * volatile const * pSrc, memory_order order ) CDS_NOEXCEPT
449         {
450             assert( order ==  memory_order_relaxed
451                  || order ==  memory_order_consume
452                  || order ==  memory_order_acquire
453                  || order == memory_order_seq_cst
454             );
455             assert( pSrc );
456             T * val;
457             __asm__ __volatile__ (
458                 "ld8.acq %[val] = [%[pSrc]]  \n\t"
459                 : [val] "=r" (val)
460                 : [pSrc] "r" (pSrc)
461                 : "memory"
462             );
463             return val;
464         }
465
466         template <typename T>
467         static inline void store_ptr( T * volatile * pDest, T * val, memory_order order ) CDS_NOEXCEPT
468         {
469             assert( order ==  memory_order_relaxed
470                  || order ==  memory_order_release
471                  || order == memory_order_seq_cst
472             );
473             assert( pDest );
474
475             if ( order == memory_order_seq_cst ) {
476                 __asm__ __volatile__ (
477                     "st8.rel [%[pDest]] = %[val] \n\t"
478                     "mf     \n\t"
479                     :: [pDest] "r" (pDest), [val] "r" (val)
480                     : "memory"
481                 );
482             }
483             else {
484                 __asm__ __volatile__ (
485                     "st8.rel [%[pDest]] = %[val] \n\t"
486                     :: [pDest] "r" (pDest), [val] "r" (val)
487                     : "memory"
488                 );
489                 fence_after(order);
490             }
491         }
492
493         template <typename T>
494         static inline bool cas_ptr_strong( T * volatile * pDest, T *& expected, T * desired, memory_order mo_success, memory_order mo_fail ) CDS_NOEXCEPT
495         {
496             static_assert( sizeof(T *) == 8, "Illegal size of operand" );
497             assert( pDest );
498
499             T * current;
500
501             switch(mo_success) {
502             case memory_order_relaxed:
503             case memory_order_consume:
504             case memory_order_acquire:
505                 __asm__ __volatile__ (
506                     "mov ar.ccv = %[expected] ;;\n\t"
507                     "cmpxchg8.acq %[current] = [%[pDest]], %[desired], ar.ccv\n\t"
508                     : [current] "=r" (current)
509                     : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired)
510                     : "ar.ccv", "memory"
511                 );
512                 break;
513             case memory_order_release:
514                __asm__ __volatile__ (
515                    "mov ar.ccv = %[expected] ;;\n\t"
516                    "cmpxchg8.rel %[current] = [%[pDest]], %[desired], ar.ccv\n\t"
517                    : [current] "=r" (current)
518                    : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired)
519                    : "ar.ccv", "memory"
520                );
521                break;
522             case memory_order_acq_rel:
523             case memory_order_seq_cst:
524                __asm__ __volatile__ (
525                    "mov ar.ccv = %[expected] ;;\n\t"
526                    "cmpxchg8.rel %[current] = [%[pDest]], %[desired], ar.ccv\n\t"
527                    "mf \n\t"
528                    : [current] "=r" (current)
529                    : [pDest] "r" (pDest), [expected] "r" (expected), [desired] "r" (desired)
530                    : "ar.ccv", "memory"
531                );
532                break;
533             default:
534                 assert(false);
535             }
536
537             bool bSuccess = expected == current;
538             expected = current;
539             if ( !bSuccess )
540                 fence_after( mo_fail );
541             return bSuccess;
542         }
543
544         template <typename T>
545         static inline bool cas_ptr_weak( T * volatile * pDest, T *& expected, T * desired, memory_order mo_success, memory_order mo_fail ) CDS_NOEXCEPT
546         {
547             return cas_ptr_strong( pDest, expected, desired, mo_success, mo_fail );
548         }
549
550         template <typename T>
551         static inline T * exchange_ptr( T * volatile * pDest, T * val, memory_order order ) CDS_NOEXCEPT
552         {
553             static_assert( sizeof(T *) == 8, "Illegal size of operand" );
554             assert( pDest );
555
556             T * current;
557             switch(order) {
558                 case memory_order_relaxed:
559                 case memory_order_consume:
560                 case memory_order_acquire:
561                     __asm__ __volatile__ (
562                         "xchg8 %[current] = [%[pDest]], %[val]\n\t"
563                         : [current] "=r" (current)
564                         : [pDest] "r" (pDest), [val] "r" (val)
565                         : "memory"
566                     );
567                     break;
568                 case memory_order_acq_rel:
569                 case memory_order_release:
570                 case memory_order_seq_cst:
571                     __asm__ __volatile__ (
572                         "mf \n\t"
573                         "xchg8 %[current] = [%[pDest]], %[val]\n\t"
574                         : [current] "=r" (current)
575                         : [pDest] "r" (pDest), [val] "r" (val)
576                         : "memory"
577                     );
578                     break;
579                 default: assert(false);
580             }
581             return current;
582         }
583
584
585         template <typename T> struct atomic_pointer_sizeof { enum { value = sizeof(T) }; };
586         template <> struct atomic_pointer_sizeof<void> { enum { value = 1 }; };
587
588         // It does not work properly
589         // atomic.fetch_add( ... ) returns nullptr, why?..
590 //#       define CDS_ATOMIC_fetch_ptr_add_defined
591         template <typename T>
592         static inline T * fetch_ptr_add( T * volatile * pDest, ptrdiff_t val, memory_order order) CDS_NOEXCEPT
593         {
594             static_assert( sizeof(T *) == 8, "Illegal size of operand" );
595             assert( pDest );
596
597             T * cur;
598             val *= atomic_pointer_sizeof<T>::value;
599             switch ( val ) {
600                 case 1:
601                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 1 );
602                     break;
603                 case 4:
604                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 4 );
605                     break;
606                 case 8:
607                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 8 );
608                     break;
609                 case 16:
610                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, 16 );
611                     break;
612                 default:
613                     cur = load_ptr( pDest, memory_order_relaxed );
614                     do {} while ( !cas_ptr_strong( pDest, cur, reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(cur) + val), order, memory_order_relaxed ));
615                     break;
616             }
617             return cur;
618         }
619
620         // It does not work properly
621         // atomic.fetch_sub( ... ) returns nullptr, why?..
622 //#       define CDS_ATOMIC_fetch_ptr_sub_defined
623         template <typename T>
624         static inline T * fetch_ptr_sub( T * volatile * pDest, ptrdiff_t val, memory_order order) CDS_NOEXCEPT
625         {
626             static_assert( sizeof(T *) == 8, "Illegal size of operand" );
627             assert( pDest );
628             T * cur;
629             val *= atomic_pointer_sizeof<T>::value;
630             switch ( val ) {
631                 case 1:
632                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -1 );
633                     break;
634                 case 4:
635                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -4 );
636                     break;
637                 case 8:
638                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -8 );
639                     break;
640                 case 16:
641                     CDS_ITANIUM_ATOMIC_FETCH_ADD( 8, -16 );
642                     break;
643                 default:
644                     cur = load_ptr( pDest, memory_order_relaxed );
645                     do {} while ( !cas_ptr_strong( pDest, cur, reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(cur) - val), order, memory_order_relaxed ));
646                     break;
647             }
648             return cur;
649         }
650
651         //-----------------------------------------------------------------------------
652         // atomic flag primitives
653         //-----------------------------------------------------------------------------
654
655         typedef bool atomic_flag_type;
656         static inline bool atomic_flag_tas( atomic_flag_type volatile * pFlag, memory_order order ) CDS_NOEXCEPT
657         {
658             return exchange8( pFlag, true, order );
659         }
660
661         static inline void atomic_flag_clear( atomic_flag_type volatile * pFlag, memory_order order ) CDS_NOEXCEPT
662         {
663             store8( pFlag, false, order );
664         }
665
666 #undef CDS_ITANIUM_ATOMIC_LOAD
667 #undef CDS_ITANIUM_ATOMIC_STORE
668 #undef CDS_ITANIUM_ATOMIC_CAS
669 #undef CDS_ITANIUM_ATOMIC_EXCHANGE
670 #undef CDS_ITANIUM_ATOMIC_FETCH_ADD
671
672     }} // namespace gcc::ia64
673
674 #ifndef CDS_CXX11_INLINE_NAMESPACE_SUPPORT
675     using namespace gcc::ia64;
676 #endif
677     }   // namespace platform
678 }}  // namespace cds::cxx11_atomic
679 //@endcond
680
681 #endif // #ifndef CDSLIB_COMPILER_GCC_IA64_CXX11_ATOMIC_H