Benefit of Cortex-M4F
Embedded systems gurus always repeated: don’t use floating-point numbers in your firmware! But what if you have processor with a FPU? Well, then you simply can. Do not forget, however, to carefully look at your disassembly file, as always.
So this is what we get with GCC 4.6.2 on Cortex-M3:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
float a = 0.123123f; 8000350: 4b21 ldr r3, [pc, #132] ; (80003d8 <main+0xd8>) 8000352: 617b str r3, [r7, #20] float b = 0.123123f; 8000354: 4b20 ldr r3, [pc, #128] ; (80003d8 <main+0xd8>) 8000356: 613b str r3, [r7, #16] float c = 0.0f; 8000358: 4b20 ldr r3, [pc, #128] ; (80003dc <main+0xdc>) 800035a: 60fb str r3, [r7, #12] c = a * b; 800035c: 6978 ldr r0, [r7, #20] 800035e: 6939 ldr r1, [r7, #16] 8000360: f000 f8d6 bl 8000510 <__aeabi_fmul> 8000364: 4603 mov r3, r0 8000366: 60fb str r3, [r7, #12] b = c; 8000368: 68fb ldr r3, [r7, #12] 800036a: 613b str r3, [r7, #16] (...) 08000510 <__aeabi_fmul>: 8000510: f04f 0cff mov.w ip, #255 ; 0xff 8000514: ea1c 52d0 ands.w r2, ip, r0, lsr #23 8000518: bf1e ittt ne 800051a: ea1c 53d1 andsne.w r3, ip, r1, lsr #23 800051e: ea92 0f0c teqne r2, ip 8000522: ea93 0f0c teqne r3, ip 8000526: d06f beq.n 8000608 <__aeabi_fmul+0xf8> 8000528: 441a add r2, r3 800052a: ea80 0c01 eor.w ip, r0, r1 800052e: 0240 lsls r0, r0, #9 8000530: bf18 it ne 8000532: ea5f 2141 movsne.w r1, r1, lsl #9 8000536: d01e beq.n 8000576 <__aeabi_fmul+0x66> 8000538: f04f 6300 mov.w r3, #134217728 ; 0x8000000 800053c: ea43 1050 orr.w r0, r3, r0, lsr #5 8000540: ea43 1151 orr.w r1, r3, r1, lsr #5 8000544: fba0 3101 umull r3, r1, r0, r1 8000548: f00c 4000 and.w r0, ip, #2147483648 ; 0x80000000 800054c: f5b1 0f00 cmp.w r1, #8388608 ; 0x800000 8000550: bf3e ittt cc 8000552: 0049 lslcc r1, r1, #1 8000554: ea41 71d3 orrcc.w r1, r1, r3, lsr #31 8000558: 005b lslcc r3, r3, #1 800055a: ea40 0001 orr.w r0, r0, r1 800055e: f162 027f sbc.w r2, r2, #127 ; 0x7f 8000562: 2afd cmp r2, #253 ; 0xfd 8000564: d81d bhi.n 80005a2 <__aeabi_fmul+0x92> 8000566: f1b3 4f00 cmp.w r3, #2147483648 ; 0x80000000 800056a: eb40 50c2 adc.w r0, r0, r2, lsl #23 800056e: bf08 it eq 8000570: f020 0001 biceq.w r0, r0, #1 8000574: 4770 bx lr 8000576: f090 0f00 teq r0, #0 800057a: f00c 4c00 and.w ip, ip, #2147483648 ; 0x80000000 800057e: bf08 it eq 8000580: 0249 lsleq r1, r1, #9 8000582: ea4c 2050 orr.w r0, ip, r0, lsr #9 8000586: ea40 2051 orr.w r0, r0, r1, lsr #9 800058a: 3a7f subs r2, #127 ; 0x7f 800058c: bfc2 ittt gt 800058e: f1d2 03ff rsbsgt r3, r2, #255 ; 0xff 8000592: ea40 50c2 orrgt.w r0, r0, r2, lsl #23 8000596: 4770 bxgt lr 8000598: f440 0000 orr.w r0, r0, #8388608 ; 0x800000 800059c: f04f 0300 mov.w r3, #0 80005a0: 3a01 subs r2, #1 80005a2: dc5d bgt.n 8000660 <__aeabi_fmul+0x150> 80005a4: f112 0f19 cmn.w r2, #25 80005a8: bfdc itt le 80005aa: f000 4000 andle.w r0, r0, #2147483648 ; 0x80000000 80005ae: 4770 bxle lr 80005b0: f1c2 0200 rsb r2, r2, #0 80005b4: 0041 lsls r1, r0, #1 80005b6: fa21 f102 lsr.w r1, r1, r2 80005ba: f1c2 0220 rsb r2, r2, #32 80005be: fa00 fc02 lsl.w ip, r0, r2 80005c2: ea5f 0031 movs.w r0, r1, rrx 80005c6: f140 0000 adc.w r0, r0, #0 80005ca: ea53 034c orrs.w r3, r3, ip, lsl #1 80005ce: bf08 it eq 80005d0: ea20 70dc biceq.w r0, r0, ip, lsr #31 80005d4: 4770 bx lr 80005d6: f092 0f00 teq r2, #0 80005da: f000 4c00 and.w ip, r0, #2147483648 ; 0x80000000 80005de: bf02 ittt eq 80005e0: 0040 lsleq r0, r0, #1 80005e2: f410 0f00 tsteq.w r0, #8388608 ; 0x800000 80005e6: 3a01 subeq r2, #1 80005e8: d0f9 beq.n 80005de <__aeabi_fmul+0xce> 80005ea: ea40 000c orr.w r0, r0, ip 80005ee: f093 0f00 teq r3, #0 80005f2: f001 4c00 and.w ip, r1, #2147483648 ; 0x80000000 80005f6: bf02 ittt eq 80005f8: 0049 lsleq r1, r1, #1 80005fa: f411 0f00 tsteq.w r1, #8388608 ; 0x800000 80005fe: 3b01 subeq r3, #1 8000600: d0f9 beq.n 80005f6 <__aeabi_fmul+0xe6> 8000602: ea41 010c orr.w r1, r1, ip 8000606: e78f b.n 8000528 <__aeabi_fmul+0x18> 8000608: ea0c 53d1 and.w r3, ip, r1, lsr #23 800060c: ea92 0f0c teq r2, ip 8000610: bf18 it ne 8000612: ea93 0f0c teqne r3, ip 8000616: d00a beq.n 800062e <__aeabi_fmul+0x11e> 8000618: f030 4c00 bics.w ip, r0, #2147483648 ; 0x80000000 800061c: bf18 it ne 800061e: f031 4c00 bicsne.w ip, r1, #2147483648 ; 0x80000000 8000622: d1d8 bne.n 80005d6 <__aeabi_fmul+0xc6> 8000624: ea80 0001 eor.w r0, r0, r1 8000628: f000 4000 and.w r0, r0, #2147483648 ; 0x80000000 800062c: 4770 bx lr 800062e: f090 0f00 teq r0, #0 8000632: bf17 itett ne 8000634: f090 4f00 teqne r0, #2147483648 ; 0x80000000 8000638: 4608 moveq r0, r1 800063a: f091 0f00 teqne r1, #0 800063e: f091 4f00 teqne r1, #2147483648 ; 0x80000000 8000642: d014 beq.n 800066e <__aeabi_fmul+0x15e> 8000644: ea92 0f0c teq r2, ip 8000648: d101 bne.n 800064e <__aeabi_fmul+0x13e> 800064a: 0242 lsls r2, r0, #9 800064c: d10f bne.n 800066e <__aeabi_fmul+0x15e> 800064e: ea93 0f0c teq r3, ip 8000652: d103 bne.n 800065c <__aeabi_fmul+0x14c> 8000654: 024b lsls r3, r1, #9 8000656: bf18 it ne 8000658: 4608 movne r0, r1 800065a: d108 bne.n 800066e <__aeabi_fmul+0x15e> 800065c: ea80 0001 eor.w r0, r0, r1 8000660: f000 4000 and.w r0, r0, #2147483648 ; 0x80000000 8000664: f040 40fe orr.w r0, r0, #2130706432 ; 0x7f000000 8000668: f440 0000 orr.w r0, r0, #8388608 ; 0x800000 800066c: 4770 bx lr 800066e: f040 40fe orr.w r0, r0, #2130706432 ; 0x7f000000 8000672: f440 0040 orr.w r0, r0, #12582912 ; 0xc00000 8000676: 4770 bx lr |
And this is output from the same GCC version on Cortex-M4F (compiled with flags -mcpu=cortex-m4 -mthumb -mfloat-abi=hard -mfpu=fpv4-sp-d16 -ffast-math -fsingle-precision-constant):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
float a = 0.123123f; 8000410: 4b3c ldr r3, [pc, #240] ; (8000504 ) 8000412: 617b str r3, [r7, #20] float b = 0.123123f; 8000414: 4b3b ldr r3, [pc, #236] ; (8000504 ) 8000416: 613b str r3, [r7, #16] float c = 0.0f; 8000418: 4b3b ldr r3, [pc, #236] ; (8000508 ) 800041a: 60fb str r3, [r7, #12] c = a * b; 800041c: ed97 7a05 vldr s14, [r7, #20] 8000420: edd7 7a04 vldr s15, [r7, #16] 8000424: ee67 7a27 vmul.f32 s15, s14, s15 8000428: edc7 7a03 vstr s15, [r7, #12] b = c; 800042c: 68fb ldr r3, [r7, #12] 800042e: 613b str r3, [r7, #16] |
This is the difference, isn’t it?